%Meccano for Peauc, large cube tetahedron octohedron, Enigma paper
\documentclass[a4paper,12pt]{article}
\usepackage{amsmath,amsxtra,amsthm,amssymb,makeidx,graphics}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{convention}[theorem]{Convention}
\newtheorem{fact}[theorem]{Fact}
\theoremstyle{remark}
\newtheorem{question}[theorem]{Exercise}
\newcommand{\Orb}{\operatorname{Orb}}
\newcommand{\Stab}{\operatorname{Stab}}
\newcommand{\lcm}{\operatorname{lcm}}
\newcommand{\tr}{\operatorname{tr}}
\newcommand{\gen}{\operatorname{gen}}
\renewcommand{\emptyset}{\varnothing}
\begin{document}
\title{Groups and Geometry\\
The Second Part\\of\\Algebra and Geometry}
\author{T.W.K\"{o}rner}
\maketitle
\begin{footnotesize}
\noindent
{\bf Small print} The syllabus for the course is defined by
the Faculty Board Schedules (which are minimal for lecturing
and maximal for examining). What is presented here contains
some results which
it would not, in my opinion, be
fair to set as book-work although they could well appear
as problems. (A book-work question asks you for
material which you are supposed to know. A problem
question asks you to use known material to solve
a question which you may not have seen before. A typical
Cambridge examination question consists of book-work
followed by a problem, the so-called `rider',
which makes use of the book-work.)
I should {\bf very much} appreciate being told
of any corrections or possible improvements
and might even part with a small reward to the
first finder of particular errors.
These notes are written in
\LaTeXe\ and should be available
in tex, ps, pdf and dvi format
from my home page
\begin{center}
{\bf http://www.dpmms.cam.ac.uk/\~{}twk/}
\end{center}
My e-mail address is \verb+twk@dpmms+.
Supervisors can obtain notes on the exercises
in the last four sections from the DPMMS secretaries
or by e-mailing me for the dvi file.
\end{footnotesize}
\tableofcontents
\section{Preliminary remarks}
\begin{convention} We shall write ${\mathbb F}$ to mean either
${\mathbb C}$ or ${\mathbb R}$.
\end{convention}
Our motto in this course is
`linear maps for understanding, matrices for computation'.
We recall some definitions and theorems from earlier on.
\begin{definition} Let
$\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$ be linear
and let ${\mathbf e}_{1}$, ${\mathbf e}_{2}$, \dots,
${\mathbf e}_{n}$ be a basis. Then the matrix $A=(a_{ij})$ of
$\alpha$ with respect to this basis is given by the rule
\[\alpha({\mathbf e}_{j})=\sum_{i=1}^{n}a_{ij}{\mathbf e}_{i}.\]
\end{definition}
We observe that, if ${\mathbf x}=\sum_{j=1}^{n}x_{j}{\mathbf e}_{j}$
and $\alpha({\mathbf x})={\mathbf y}=
\sum_{i=1}^{n}y_{i}{\mathbf e}_{i}$, then
\[y_{i}=\sum_{j=1}^{n}a_{ij}x_{j}.\]
Thus coordinates and bases go opposite ways. The definition
chosen is conventional but represents a universal convention
and must be learnt.
\begin{theorem}{\bf (Change of basis.)} Let
$\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$ be a linear map.
If $\alpha$ has matrix $A=(a_{ij})$
with respect to a basis ${\mathbf e}_{1}$, ${\mathbf e}_{2}$, \dots,
${\mathbf e}_{n}$ and matrix $B=(b_{ij})$
with respect to a basis ${\mathbf f}_{1}$, ${\mathbf f}_{2}$, \dots,
${\mathbf f}_{n}$, then there is an invertible $n\times n$
matrix $P$ such that
\[B=P^{-1}AP.\]
The matrix $P=(p_{ij})$ is given by the rule
\[{\mathbf f}_{j}=\sum_{i=1}^{n}p_{ij}{\mathbf e}_{i}.\]
\end{theorem}
We recall an important application of this result. Since
\[\det(P^{-1}AP)=(\det P)^{-1}\det A\det P=\det A,\]
we see that all matrix representations of a given linear map
$\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$
have the same determinant. We can thus write $\det\alpha=\det A$
where $A$ is any matrix representation of $\alpha$.
Although we shall not conduct any explicit calculations,
I shall assume that my audience is familiar with
the process of Gaussian elimination both as a method of
solving linear equations and of inverting square matrices.
(If the previous lecturer has not covered these topics,
I will.)
The following observation is quite useful.
\begin{example} If ${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ is the standard basis (that is
to say ${\mathbf e}_{j}$ is the collumn vector with
$1$ in the $j$th place and zero elsewhere), then
the matrix $A$ of a linear map $\alpha$ with respect to
this basis has $\alpha({\mathbf e}_{j})$ as its
$j$th collumn.
\end{example}
\section{Eigenvectors and eigenvalues}
\begin{definition} If $\alpha:{\mathbb F}^{n}\rightarrow {\mathbb F}^{n}$
is linear and $\alpha({\mathbf u})=\lambda{\mathbf u}$ for some
vector ${\mathbf u}\neq{\mathbf 0}$ and some $\lambda\in{\mathbb F}$,
we say that ${\mathbf u}$ is an eigenvector of $\alpha$
with eigenvalue $\lambda$.
\end{definition}
\begin{theorem}
If $\alpha:{\mathbb F}^{n}\rightarrow {\mathbb F}^{n}$
is linear, then $\lambda$ is an eigenvalue of $\alpha$ if and
only if $\det(\lambda\iota-\alpha)=0$.
\end{theorem}
\begin{lemma} If $n=3$, then any linear map
$\alpha:{\mathbb R}^{3}\rightarrow {\mathbb R}^{3}$ has
an eigenvector. It follows that there exists some line
$l$ through ${\mathbf 0}$ with $\alpha(l)\subseteq l$.
\end{lemma}
\begin{example} Let
$R_{\theta}:{\mathbb R}^{2}\rightarrow {\mathbb R}^{2}$
be the linear map given by rotation through $\theta$
about ${\mathbf 0}$. Then $R_{\theta}$ has no eigenvectors
unless $\theta\equiv 0\mod{\pi}$. If $\theta\equiv \pi\mod{2\pi}$,
then every vector in ${\mathbb R}^{2}\setminus\{{\mathbf 0}\}$
is an eigenvector with eigenvalue $-1$.
If $\theta\equiv 0\mod{2\pi}$,
then every vector in ${\mathbb R}^{2}\setminus\{{\mathbf 0}\}$
is an eigenvector with eigenvalue $1$.
\end{example}
\begin{theorem}{\bf (Fundamental Theorem of Algebra.)}
If $n\geq 1$ and $a_{j}\in{\mathbb C}$ $[j=0,1,\dots,n]$ with
$a_{n}\neq 0$, then the equation
\[\sum_{j=0}^{n}a_{j}z^{j}=0\]
has a solution in ${\mathbb C}$.
\end{theorem}
The Fundamental Theorem of Algebra is, in fact, a theorem of
analysis and its proof is one of the many high spots of
the complex variable theory lectures next year.
We note the following corollary.
\begin{lemma} If $n\geq 1$ and $a_{j}\in{\mathbb C}$ $[j=0,1,\dots,n-1]$,
then we can find
$\omega_{1}$, $\omega_{2}$, \dots, $\omega_{n}\in {\mathbb C}$
such that
\[z^{n}+\sum_{j=0}^{n-1}a_{j}z^{j}=\prod_{j=1}^{n}(z-\omega_{j})\]
\end{lemma}
If $(z-\omega)^{k}$ is a factor of $z^{n}+\sum_{j=0}^{n-1}a_{j}z^{j}$,
but $(z-\omega)^{k+1}$ is not we say that $\omega$ is a $k$ times
repeated root of $z^{n}+\sum_{j=0}^{n-1}a_{j}z^{j}$.
\begin{lemma} Any linear map
$\alpha:{\mathbb C}^{n}\rightarrow {\mathbb C}^{n}$ has
an eigenvector. It follows that there exists
a one dimensional complex subspace
\[l=\{w{\mathbf e}:w\in{\mathbb C}\}\]
(where ${\mathbf e}\neq{\mathbf 0}$) with $\alpha(l)\subseteq l$.
\end{lemma}
\begin{theorem}
Suppose $\alpha:{\mathbb F}^{n}\rightarrow {\mathbb F}^{n}$
is linear. Then $\alpha$ has diagonal matrix $D$ with respect
to a basis ${\mathbf e}_{1}$, ${\mathbf e}_{2}$, \dots,
${\mathbf e}_{n}$ if and only if the ${\mathbf e}_{j}$
are eigenvectors. The diagonal entries $d_{ii}$ of $D$ are
the eigenvalues of the ${\mathbf e}_{i}$.
\end{theorem}
If $\alpha$ has a diagonal matrix with respect to some basis we
say that $\alpha$ is diagonalisable.
\begin{theorem}\label{Distinct} If a linear map
$\alpha:{\mathbb F}^{n}\rightarrow {\mathbb F}^{n}$
has $n$ distinct eigenvalues, then the associated eigenvectors
form a basis and $\alpha$ has a diagonal matrix with respect
to this basis.
\end{theorem}
I shall prove this result for $n\leq 3$. It will be obvious
from the proof that the result holds for all $n$, but the
general result is best approached via the machinery of
next year's linear algebra course.
Theorem~\ref{Distinct} gives a sufficient but not a necessary
condition for a linear map to be diagonalisable. The
identity map $\iota:{\mathbb F}^{n}\rightarrow {\mathbb F}^{n}$
has only one eigenvalue but has the diagonal matrix $I$
with respect to any basis. On the other hand even when
we work in ${\mathbb C}^{n}$ rather than ${\mathbb R}^{n}$
not every linear map is diagonalisable.
\begin{example}\label{non diagonalisable}
Let ${\mathbf e}_{1}$, ${\mathbf e}_{2}$
be a basis for ${\mathbb F}^{2}$. The linear map
$\beta:{\mathbb F}^{2}\rightarrow {\mathbb F}^{2}$
given by
\[\beta(x_{1}{\mathbf e}_{1}+x_{2}{\mathbf e}_{2})
=x_{2}{\mathbf e}_{1}\]
is non-diagonalisable.
\end{example}
Fortunately the map just given is the `typical' non-diagonalisable
linear map for ${\mathbb C}^{2}$.
\begin{theorem}\label{canonical}
If $\alpha:{\mathbb C}^{2}\rightarrow {\mathbb C}^{2}$
is linear,
then exactly one of the following three things must happen.
(i) $\alpha$ has two distinct eigenvalues $\lambda$ and $\mu$
and we can take a basis of eigenvectors
${\mathbf e}_{1}$, ${\mathbf e}_{2}$
for ${\mathbb C}^{2}$. With respect to this basis, $\alpha$ has
matrix
\[\begin{pmatrix}\lambda&0\\0&\mu\end{pmatrix}.\]
(ii) $\alpha$ has only one distinct eigenvalue $\lambda$
but is diagonalisable. Then $\alpha=\lambda\iota$ and
has matrix
\[\begin{pmatrix}\lambda&0\\0&\lambda\end{pmatrix}\]
with respect to any basis.
(iii) $\alpha$ has only one distinct eigenvalue $\lambda$
and is not diagonalisable.
Then there exists a basis ${\mathbf e}_{1}$, ${\mathbf e}_{2}$
for ${\mathbb C}^{2}$ with respect to which $\alpha$ has
matrix
\[\begin{pmatrix}\lambda&1\\0&\lambda\end{pmatrix}.\]
Note that ${\mathbf e}_{1}$ is an eigenvector with eigenvalue
$\lambda$ but ${\mathbf e}_{2}$ is not.
\end{theorem}
The general case of a linear map
$\alpha:{\mathbb C}^{n}\rightarrow {\mathbb C}^{n}$
is substantially more complicated. The
possible outcomes are classified
using the `Jordan Normal Form' in a theorem that is easy
to state and understand but tricky to prove.
We have the following corollary to Theorem~\ref{canonical}.
\begin{example}{\bf (Cayley--Hamilton in 2 dimensions.)}%
\label{Cayley--Hamilton}
If $\alpha:{\mathbb C}^{2}\rightarrow{\mathbb C}^{2}$
is a linear map,
let us write $Q(t)=\det(t\iota-\alpha)$. Then we have
\[Q(t)=t^{2}+at+b\]
where $a,\,b\in{\mathbb C}$. The Cayley--Hamilton
theorem states that
\[\alpha^{2}+a\alpha+b\iota=O\]
or, more briefly\footnote{But more confusingly for the novice.},
that $Q(\alpha)=O$.
\end{example}
We call $Q$ the characteristic
polynomial of $\alpha$ and say that $\alpha$ satisfies its own
characteristic equation. Once again the result is much harder to prove
in higher dimensions. (If you find Example~\ref{Cayley--Hamilton}
hard, note that it is merely an example and not central to the course.)
\section{Computation} Let us move from ideas to computation.
\begin{theorem}\label{diagonal} The following two statements
about an $n\times n$ matrix $A$ over ${\mathbb F}$ are
equivalent.
(i) If we choose a basis
${\mathbf u}_{1}$, ${\mathbf u}_{2}$, \dots,
${\mathbf u}_{n}$ for ${\mathbb F}^{n}$ and define a linear map
$\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$ by
\[\alpha({\mathbf u}_{j})=\sum_{i=1}^{n}a_{ij}{\mathbf u}_{i},\]
then we can find a basis
${\mathbf e}_{1}$, ${\mathbf e}_{2}$, \dots,
${\mathbf e}_{n}$ for ${\mathbb F}^{n}$ and $d_{i}\in{\mathbb F}$
such that
\[\alpha({\mathbf e}_{j})=d_{j}{\mathbf e}_{j}.\]
(ii) There is a non-singular $n\times n$ matrix $P$
such that $P^{-1}AP$ is diagonal.
\end{theorem}
If the conditions of Theorem~\ref{diagonal} hold,
we say that $A$ is diagonalisable. (Thus a matrix $A$
is diagonalisable if and only if it represents a diagonalisable
linear map with respect to some basis.)
As an indication of
why diagonalisation is likely to be useful, observe
that if $A$, $P$, $D$ are $n\times n$ matrices with $P$ invertible,
$D$ diagonal and $P^{-1}AP=D$, then
\[A^{m}=(PDP^{-1})^{m}=PDP^{-1}PDP^{-1}\dots PDP^{-1}=PD^{m}P^{-1}\]
and note how easy it is to compute $D^{m}$. Here is
an example
of why it might be useful to compute powers of matrices.
\begin{example} Let $n$ towns be called (rather uninterestingly)
$1$, $2$, \dots, $n$. Write $a_{ij}=1$ if there is a road
leading directly from town $i$ to town $j$ and $a_{ij}=0$
otherwise (we take $a_{ii}=0$). If we write $A^{m}=(a_{ij}^{(m)})$
then $a_{ij}^{(m)}$ is the number of routes from $i$ to $j$
of length $m$. (A route of length $m$ passes through $m+1$
towns including the starting and finishing towns.
If you pass through the
same town more than once each visit is counted separately.)
\end{example}
In the discussion that
follows, we take the basis vectors ${\mathbf u}_{j}$
to be the standard column vectors of length $n$
with entries 0 except in the $j$th place where we have 1.
Recall that any $n\times n$ matrix $A$ gives rise to a linear
map $\alpha$ by the rule
\[\alpha({\mathbf u}_{j})=\sum_{i=1}^{n}a_{ij}{\mathbf u}_{i}.\]
Suppose that we wish to `diagonalise'
such an $n\times n$ matrix.
The first step is to look at the roots of the
characteristic polynomial
\[P(t)=\det(tI-A).\]
If we work over ${\mathbb R}$ and some of the roots of $P$
are not real, we know at once that $A$ is not diagonalisable
(over ${\mathbb R}$). If we work over ${\mathbb C}$ or
if we
work over ${\mathbb R}$ and all the roots are real, we can move on to
the next stage. Either the characteristic polynomial has
$n$ distinct roots or it does not. If it does, we know
that $A$ is diagonalisable.
If we find
the $n$ distinct roots (easier said than done outside
the artificial conditions of the examination room)
$\lambda_{1}$, $\lambda_{2}$, \dots, $\lambda_{n}$
we know without further computation that there
exists a non-singular $P$ such that $P^{-1}AP=D$
where $D$ is a diagonal matrix with diagonal entries
$\lambda_{j}$. Often knowledge of $D$ is sufficient
for our purposes but if not we proceed to find $P$ as
follows.
For each $\lambda_{j}$ we know that the system of $n$
linear equations in $n$ unknowns given by
\[(A-\lambda_{j}I){\mathbf x}={\mathbf 0}\]
(where ${\mathbf x}$ is a column vector of length $n$,
that is to say, with $n$ entries)
has non-zero solutions. Let ${\mathbf e}_{j}$ be one of them
so that
\[A{\mathbf e}_{j}=\lambda_{j}{\mathbf e}_{j}.\]
Note that, if
$P$ is the $n\times n$ matrix with $j$th column ${\mathbf e}_{j}$,
then $P{\mathbf u}_{j}={\mathbf e}_{j}$ and
\[P^{-1}AP{\mathbf u}_{j}=P^{-1}A{\mathbf e}_{j}
=\lambda_{j}P^{-1}{\mathbf e}_{j}=\lambda_{j}{\mathbf u}_{j}
=D{\mathbf u}_{j}\]
for all $1\leq j\leq n$ and so
\[P^{-1}AP=D.\]
If we need to know $P^{-1}$, we calculate it by inverting $P$
in some standard way.
\begin{exercise} Diagonalise the matrix
\[\left(
\begin{matrix}\cos\theta&-\sin\theta\\
\sin\theta&\cos\theta
\end{matrix}\right)\]
(with $\theta$ real) over ${\mathbb C}$.
\end{exercise}
What if the characteristic polynomial does not have
$n$ distinct roots? In this case we do not know, without
further investigation, whether $A$ is diagonalisable or
not. Example~\ref{non diagonalisable} gives us
\[\begin{pmatrix}0&1\\0&0\end{pmatrix}\]
as an example of a non-diagonalisable matrix over ${\mathbb C}$.
This problem will be looked at further in
next year's linear algebra course. Later I will do a simple example
(the second matrix of Example~\ref{Repeated})
where the characteristic polynomial has repeated roots.
It cannot be emphasised too strongly that the method
described above bears the same relation to real life problems
as `Tom And Wendy Go Shopping' does to `King Lear'.
(But remember that you learn to read by reading
`Tom And Wendy Go Shopping' rather than `King Lear'.)
If $n=200$ then the characteristic polynomial is
likely to be extremely unpleasant.
We can now rewrite Theorem~\ref{canonical} as follows.
\begin{theorem}~\label{canonical1}
If $A$ is a $2\times 2$ complex matrix
then exactly one of the following three things must happen.
(i) We can find a non-singular $2\times 2$ complex matrix
$P$ such that
\[P^{-1}AP=\begin{pmatrix}\lambda&0\\0&\mu\end{pmatrix}\]
with $\lambda\neq\mu$.
(ii) $A=\lambda I$ for some $\lambda$.
(iii) We can find a non-singular $2\times 2$ complex matrix
$P$ such that
\[P^{-1}AP=\begin{pmatrix}\lambda&1\\0&\lambda\end{pmatrix}.\]
for some $\lambda$.
\end{theorem}
The following result links up with the first year course
on differential equations.
\begin{example} Consider the simultaneous differential equations
\begin{align*}
\dot{x}_{1}&=a_{11}x_{1}+a_{12}x_{2}\\
\dot{x}_{2}&=a_{21}x_{1}+a_{22}x_{2}
\end{align*}
According as $A$ falls into one of the three cases given
in Theorem~\ref{canonical1}:
(i) $x_{1}(t)$ is a linear combination of $e^{\lambda t}$
and $e^{\mu t}$.
(ii) $x_{1}(t)=C_{1} e^{\lambda t}$,
$x_{2}(t)=C_{2} e^{\lambda t}$, with $C_{1}$ and $C_{2}$
arbitrary.
(iii) $x_{1}(t)$ is a linear combination of $e^{\lambda t}$
and $te^{\lambda t}$.
\end{example}
\section{Distance-preserving linear maps} We start with
a trivial example.
\begin{example} A restaurant serves $n$ different dishes.
The `meal vector' of a customer is the column
vector ${\mathbf x}=(x_{1},x_{2},\dots,x_{n})$ where
$x_{j}$ is the quantity of the $j$th dish ordered.
At the end of the meal, the waiter uses the
linear map $P:{\mathbb R}^{n}\rightarrow{\mathbb R}$
to obtain $P({\mathbf x})$ the amount (in pounds) the customer
must pay.
\end{example}
Although the `meal vectors' live in ${\mathbb R}^{n}$
it is not very useful to talk about the distance between
two meals. There are many other examples
where it is counter-productive to saddle ${\mathbb R}^{n}$
with things like distance and angle.
Equally there are many other occasions (particularly in the
study of the real world) when it makes sense to consider
${\mathbb R}^{n}$ equipped with the scalar product
(inner product)
\[\langle{\mathbf x},{\mathbf y}\rangle=\sum_{r=1}^{n}x_{r}y_{r},\]
the Euclidean norm
\[||{\mathbf x}||=\langle{\mathbf x},{\mathbf x}\rangle^{1/2}\]
(we take the positive square root) and Euclidean distance
\[\mbox{distance between ${\mathbf x}$ and ${\mathbf y}$}
=||{\mathbf x}-{\mathbf y}||.\]
\begin{definition}\label{S1} (i) We say that ${\mathbf a}$ and
${\mathbf b}$ are orthogonal if
$\langle{\mathbf a},{\mathbf b}\rangle=0$.
(ii) We say that ${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ are orthonormal if
\[\langle{\mathbf e}_{i},{\mathbf e}_{j}\rangle=\delta_{ij}
=\begin{cases}
1&\text{if $i=j$,}\\
0&\text{if $i\neq j$.}
\end{cases}\]
for all $1\leq i,j\leq n$.
\end{definition}
\begin{lemma}\label{S2} Any system of $n$ orthonormal vectors
${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ forms a basis for ${\mathbb R}^{n}$.
(We call this an orthonormal basis.) If
${\mathbf x}\in{\mathbb R}^{n}$, then
\[{\mathbf x}=\sum_{r=1}^{n}\langle{\mathbf x},{\mathbf e}_{r}\rangle
{\mathbf e}_{r}.\]
\end{lemma}
It will not have escaped the reader that the standard unit
vectors ${\mathbf e}_{j}$ (with $1$ as $j$th entry, $0$ everywhere
else) form an orthonormal basis\footnote{It will also not
have escaped the reader that sometimes I call the standard basis
${\mathbf e}_{j}$ and sometimes ${\mathbf u}_{j}$.
There is no fixed notation and you should always
say explicitly if you wish a particular set of vectors
to have a particular property.}.
what
The following remark is used repeatedly in studying inner products.
\begin{lemma}
If $\langle{\mathbf a},{\mathbf x}\rangle
=\langle{\mathbf b},{\mathbf x}\rangle$
for all ${\mathbf x}$ then ${\mathbf a}={\mathbf b}$.
\end{lemma}
Our first task will be to study those linear maps which
preserve length. Our main tool is a simple and rather pretty
equality.
\begin{lemma}\label{01} If ${\mathbf a},\,{\mathbf b}\in{\mathbb R}^{n}$,
then
\[||{\mathbf a}+{\mathbf b}||^{2}
-||{\mathbf a}-{\mathbf b}||^{2}=
4\langle{\mathbf a},{\mathbf b}\rangle.\]
\end{lemma}
We shall also need a definition.
\begin{definition}\label{02} If $A$ is the $n\times n$ matrix $(a_{ij})$,
then $A^{T}$ (the transpose of $A$) is the
$n\times n$ matrix $(b_{ij})$ with $b_{ij}=a_{ji}$
$[1\leq i,j\leq n]$.
\end{definition}
\begin{lemma}\label{02a} If the linear map
$\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
has matrix $A$ with respect to some
orthonormal basis and
$\alpha^{*}:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is the linear map with matrix $A^{T}$ with respect
to the same basis, then
\begin{equation*}
\langle\alpha{\mathbf x},{\mathbf y}\rangle
=\langle{\mathbf x},\alpha^{*}{\mathbf y}\rangle
\end{equation*}
for all
${\mathbf x},{\mathbf y}\in{\mathbb R}^{n}$.
Further, if
the linear map
$\beta:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
satisfies
\begin{equation*}
\langle\alpha{\mathbf x},{\mathbf y}\rangle
=\langle{\mathbf x},\beta{\mathbf y}\rangle
\end{equation*}
for all
${\mathbf x},{\mathbf y}\in{\mathbb R}^{n}$,
then $\beta=\alpha^{*}$.
\end{lemma}
\begin{exercise} Let
$\alpha,\,\beta:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
be linear.
(i) $(\alpha\beta)^{*}=\beta^{*}\alpha^{*}$.
(ii) $\alpha^{**}=\alpha$.
\end{exercise}
\begin{theorem}\label{03} Let
$\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
be linear.
The following statements are equivalent.
(i) $||\alpha {\mathbf x}||=||{\mathbf x}||$ for all
${\mathbf x}\in{\mathbb R}^{n}$.
(ii) $\langle\alpha{\mathbf x},\alpha{\mathbf y}\rangle
=\langle{\mathbf x},{\mathbf y}\rangle$ for all
${\mathbf x},{\mathbf y}\in{\mathbb R}^{n}$.
(iii) $\alpha^{*}\alpha=\iota$.
(iv) If $\alpha$ has matrix $A$ with respect to some
orthonormal basis then \mbox{$A^{T}A=I$}.
\end{theorem}
If, as I shall tend to do, we think of the linear maps
as central, we refer to the collection of distance preserving
linear maps by the name $O({\mathbb R}^{n})$. If we think of the
matrices as central, we refer to the collection of real
$n\times n$ matrices $A$ with $AA^{T}=I$ by the name $O({\mathbb R}^{n})$.
In practice most people use whichever convention is
most convenient at the time and no confusion results.
A real $n\times n$ matrix $A$ with $AA^{T}=I$ is called
an orthogonal matrix.
We recall that the determinant of a square matrix can
be evaluated by row or by column expansion and so
\[\det A^{T}=\det A.\]
\begin{lemma}\label{04} If $A$ is an orthogonal matrix, then
$\det A=1$ or $\det A=-1$.
\end{lemma}
If we think in terms of linear maps, we define
\[SO({\mathbb R}^{n})=\{\alpha\in O({\mathbb R}^{n}):\det\alpha=1\}.\]
If we think in terms of matrices, we define
\[SO({\mathbb R}^{n})=\{A\in O({\mathbb R}^{n}):\det A=1\}.\]
(The letter $O$ stands for `orthogonal', the letters
$SO$ for `special orthogonal'.)
In the rest of this section we shall look at other ways of
characterising $O({\mathbb R}^{n})$ and $SO({\mathbb R}^{n})$.
We shall think in terms of linear maps. We shall use an approach
which, I am told, goes back to Euler.
\begin{definition} If ${\mathbf n}$ is a vector of norm $1$,
the map $R:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
given by
\[R({\mathbf x})={\mathbf x}-
2\langle{\mathbf x},{\mathbf n}\rangle{\mathbf n}\]
is said to be a reflection in
\[\pi=\{\mathbf x:\langle{\mathbf x},{\mathbf n}\rangle=0\}.\]
\end{definition}
\begin{lemma} With the notation of the definition just given:
(i) There is an orthonormal basis
${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ with respect to which $R$
has a diagonal matrix $D$ with $d_{11}=-1$, $d_{ii}=1$
for all $2\leq i\leq n$.
(ii) $R^{2}=\iota$.
(iii) $R\in O({\mathbb R}^{n})$.
(iv) $\det R=-1$.
(v) If $||{\mathbf x}||=||{\mathbf y}||$
and ${\mathbf x}\neq{\mathbf y}$, then we can find a
unit vector ${\mathbf n}$ such that $R{\mathbf x}={\mathbf y}$.
Moreover, we can choose $R$ in such a way that, whenever
${\mathbf u}$ is perpendicular to ${\mathbf x}$ and ${\mathbf y}$,
we have $R{\mathbf u}={\mathbf u}$.
\end{lemma}
\begin{lemma} If $\alpha\in O({\mathbb R}^{n})$, then $\alpha$
is the product of $m$ reflections with $0\leq m\leq n$.
(If $m=0$, $\alpha=\iota$. Otherwise, we can find reflections
$R_{1}$, $R_{2}$, \dots, $R_{m}$ such that
$\alpha=R_{1}R_{2}\dots R_{m}$.) If $m$ is even,
$\alpha\in SO({\mathbb R}^{n})$. If $m$ is odd,
$\alpha\notin SO({\mathbb R}^{n})$.
\end{lemma}
\begin{lemma} If $\alpha\in O({\mathbb R}^{2})$ then one
of two things must happen.
(i) $\alpha\in SO({\mathbb R}^{2})$ and we can find
$0\leq\theta<2\pi$ such that, with respect to any
orthonormal basis, $\alpha$ has one of the two
possible matrices
\[\begin{pmatrix}\cos\theta&-\sin\theta\\ \sin\theta&\cos\theta
\end{pmatrix}
\ \text{or}
\ \begin{pmatrix}\cos\theta&\sin\theta\\-\sin\theta&\cos\theta
\end{pmatrix}.\]
(ii) $\alpha\notin SO({\mathbb R}^{2})$ and we can find
an orthonormal basis with respect to which $\alpha$ has matrix
\[\begin{pmatrix}-1&0\\0&1\end{pmatrix}.\]
\end{lemma}
To see why we have to allow two forms in part~(i), consider
an orthonormal basis ${\mathbf e}_{1}$, ${\mathbf e}_{2}$
and the related orthonormal basis ${\mathbf e}_{1}$,
$-{\mathbf e}_{2}$.
\begin{exercise} By considering the product of the rotation matrices
\[\begin{pmatrix}\cos\theta&-\sin\theta\\ \sin\theta&\cos\theta
\end{pmatrix}
\ \text{and}
\ \begin{pmatrix}\cos\phi&-\sin\phi\\ \sin\phi&\cos\phi
\end{pmatrix}\]
we can recover the addition formulae for $\cos$ and $\sin$.
\end{exercise}
\begin{lemma}\label{rotation}
(i) If $\alpha\in O({\mathbb R}^{3})$ then
we can find
an orthonormal basis with respect to which $\alpha$ has matrix
\[\begin{pmatrix}\pm 1&0&0\\
0&\cos\theta&-\sin\theta\\0&\sin\theta&\cos\theta
\end{pmatrix}.\]
(ii) If the plus sign is taken in~(i), $\alpha\in SO({\mathbb R}^{3})$.
If the minus sign is taken, $\alpha\notin SO({\mathbb R}^{3})$.
\end{lemma}
The traditional way of stating that part of Lemma~\ref{rotation}
which deals with $SO({\mathbb R}^{3})$ is to say that
every rotation has an axis. (Things are more complicated in higher
dimensions, but we do not need to go further in this course.
If you are interested, look at Exercise~\ref{Euler onwards}.)
It may be worth stating some of our
earlier results in the form they we
be used in the discussion of Cartesian tensors in
next year's mathematical methods course.
\begin{lemma} If the matrix $L\in O({\mathbb R}^{3})$, then,
using the summation convention,
\[l_{ik}l_{jk}=\delta_{ij}.\]
Further,
\[\epsilon_{ijk}l_{ir}l_{js}l_{kt}=\pm\epsilon_{rst}\]
with the positive sign if $L\in SO({\mathbb R}^{3})$
and the negative sign otherwise.
\end{lemma}
We also make the following remark.
\begin{lemma} An $n\times n$ real matrix $L$
is orthogonal if and only if its columns are orthonormal
column vectors.
An $n\times n$ real matrix $L$
is orthogonal if and only if its rows are orthonormal
row vectors.
\end{lemma}
\section{Real symmetric matrices} We say that a real
$n\times n$ matrix $A$ is symmetric if $A^{T}=A$.
In this section we deal with the diagonalisation
of such matrices. It is not immediately clear why
this is important but in the next couple of years the
reader will come across the topic in many contexts.
(1) The study of Sturm--Liouville differential equations
in the methods course next year runs in parallel with what we do here.
(2) The study of symmetric tensors in the methods course
next year will quote
our results.
(3) The t-test, F-test and so on in the statistics course next year
depend on the diagonalisation of a symmetric matrix.
(4) The study of small oscillations about equilibrium depends
on a generalisation of our ideas.
(5) The standard formalism for Quantum Mechanics and
the spectral theorem in functional analysis are both deep
generalisations of what we do here.
\noindent For the moment we note that the covariance
matrix $({\mathbb E}X_{i}X_{j})$ of $n$ random variables
and the Hessian matrix
\[\left(\frac{\partial^{2}f}{\partial x_{i}\partial x_{j}}
\right)\]
of the second partial derivatives of a well behaved function
$f$ of $n$ variables are both symmetric matrices. (If one or other
or both these matrices make no sense to you, all will become
clear next term in the probability and vector calculus courses.)
\begin{lemma}\label{K1}
Let $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
be linear.
The following statements are equivalent.
(i) $\langle\alpha{\mathbf x},{\mathbf y}\rangle
=\langle{\mathbf x},\alpha{\mathbf y}\rangle$ for all
${\mathbf x},{\mathbf y}\in{\mathbb R}^{n}$.
(ii) If $\alpha$ has matrix $A$ with respect to some
orthonormal basis, then $A$ is symmetric.
\end{lemma}
Naturally we call an $\alpha$, having the properties just described,
symmetric. We can also call $\alpha$ a `real self-adjoint' map.
\begin{theorem}\label{Real}
If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is symmetric, then all the roots of the characteristic polynomial
$\det(t\iota-\alpha)$ are real.
\end{theorem}
\begin{theorem}\label{Perpendicular}
If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is symmetric, then eigenvectors corresponding to distinct
eigenvalues are orthogonal.
\end{theorem}
\begin{theorem} (i) If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is symmetric and all the roots of the characteristic polynomial
$\det(t\iota-\alpha)$ are distinct, then there exists an orthonormal
basis of eigenvectors of $\alpha$.
(ii) If $A$ is a symmetric $n\times n$ matrix and
and all the roots of the characteristic polynomial
$\det(tI-A)$ are distinct, then there exists a
matrix $P\in SO({\mathbb R}^{n})$ such that $P^{T}AP$ is diagonal.
\end{theorem}
Much more is true.
\begin{fact}\label{Hermite}
(i) If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is symmetric, then there exists an orthonormal
basis of eigenvectors of ${\mathbb R}^{n}$ with respect
to which $\alpha$ is diagonal.
(ii) If $A$ is a symmetric $n\times n$ matrix, then there exists a
matrix $P\in SO({\mathbb R}^{n})$ such that $P^{T}AP$ is diagonal.
\end{fact}
I may sketch a proof for the case $n=3$ but it will not be examinable.
The general case will be proved with more sophisticated
techniques in next year's linear algebra
course. We note the easy converse results.
\begin{lemma}\label{L;symmmetric converse}
(i) If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
has an orthonormal
basis of eigenvectors, then $\alpha$ is symmetric.
(ii) If $A$ is an $n\times n$ real matrix and there exists a
matrix $P\in SO({\mathbb R}^{n})$ such that $P^{T}AP$ is diagonal,
then $A$ is symmetric.
\end{lemma}
It is important to think about the various conditions on our results.
\begin{exercise}\label{E;non symmetric converse} Let
\[A=\left(\begin{matrix}2&0\\0&1\end{matrix}\right)
\ \text{and}
\ P=\left(\begin{matrix}1&0\\1&1\end{matrix}\right).\]
Compute $PAP^{-1}$ and observe that it is not a symmetric
matrix, although $A$ is. Why does this not contradict
Lemma~\ref{L;symmmetric converse}.
\end{exercise}
Moving from theory to practice, we see that the diagonalisation
of an $n\times n$ symmetric matrix $A$
(using an orthogonal matix) follows the same pattern
as ordinary diagonalisation (using an invertible matrix).
The first step is to look at the
roots of the characteristic polynomial
\[P(t)=\det(tI-A).\]
By Theorem~\ref{Real} we know that all the roots are real.
If we can find
the $n$ roots (in examinations, $n$ will usually be 2 or 3
and the resulting quadratics and cubics will have nice roots)
$\lambda_{1}$, $\lambda_{2}$, \dots, $\lambda_{n}$
and the roots are distinct then we know, without further
calculation, that there exists an orthogonal matrix $P$ with
\[P^{T}AP=D,\]
where $D$ is the diagonal matrix with diagonal entries
$d_{ii}=\lambda_{i}$.
For each $\lambda_{j}$ we know that the system of $n$
linear equations in $n$ unknowns given by
\[(A-\lambda_{j}I){\mathbf x}={\mathbf 0}\]
(with ${\mathbf x}$ a column vector of length $n$)
has non-zero solutions. Let ${\mathbf u}_{j}$ be one of them
so that
\[A{\mathbf u}_{j}=\lambda_{j}{\mathbf u}_{j}.\]
We normalise by setting
\[{\mathbf e}_{j}=||{\mathbf u}_{j}||^{-1}{\mathbf u}_{j}\]
and, unless we are unusually confident of our arithmetic,
check that, as Theorem~\ref{Perpendicular} predicts,
\[\langle{\mathbf e}_{i},{\mathbf e}_{j}\rangle=\delta_{ij}.\]
If $P$ is the $n\times n$ matrix with $j$th column ${\mathbf e}_{j}$
then, from the formula just given,
$P$ is orthogonal (i.e., $PP^{T}=I$ and so $P^{-1}=P^{T}$).
We note that,
if we write ${\mathbf v}_{j}$ for the unit vector
with $1$ in the $j$th place, $0$ elsewhere, then
\[P^{T}AP{\mathbf v}_{j}=P^{-1}A{\mathbf e}_{j}
=\lambda_{j}P^{-1}{\mathbf e}_{j}=\lambda_{j}{\mathbf v}_{j}
=D{\mathbf v}_{j}\]
for all $1\leq j\leq n$ and so
\[P^{T}AP=D.\]
Our construction gives $P\in O({\mathbb R}^{n})$
but does not guarantee that $P\in SO({\mathbb R}^{n})$. If
$\det P=1$, then $P\in SO({\mathbb R}^{n})$. If $\det P=-1$,
then replacing ${\mathbf e}_{1}$ by $-{\mathbf e}_{1}$ gives a new $P$
in $SO({\mathbb R}^{n})$.
The strict logic of syllabus construction implies that the
reader should not be asked to diagonalise a symmetric
matrix when the characteristic equation has repeated
roots until she has done next year's linear algebra course.
Unfortunately nature
is not very obliging and symmetric matrices which
appear in physics often have repeated roots.
If $A$ is a symmetric $3\times 3$ matrix we proceed as follows.
If the characteristic polynomial $P$ has a three times repeated root
$\lambda$ (i.e., $P(t)=(t-\lambda)^{3}$) then (since
$A$ is symmetric, so $A=P^{T}(\lambda I)P=\lambda I$
for some othogonal P) we have $A=\lambda I$ and there is no problem.
If $P$ has a single root $\mu$ and a double root
$\lambda$ (i.e., $P(t)=(t-\mu)(t-\lambda)^{2}$) then,
as before, we can find ${\mathbf e}_{1}$ a column vector
of Euclidean length 1
with $A{\mathbf e}_{1}=\mu {\mathbf e}_{1}$.
On the other hand, it will turn out that we can find
two orthonormal vectors ${\mathbf e}_{2}$, ${\mathbf e}_{3}$
such that
\[A{\mathbf e}_{2}=\lambda {\mathbf e}_{2},
\ A{\mathbf e}_{3}=\lambda {\mathbf e}_{3}.\]
If we take $P$ to be the $3\times 3$ matrix with $j$th
column ${\mathbf e}_{j}$, then $P$ is orthogonal and
\[P^{T}AP=\begin{pmatrix}\mu&0&0\\0&\lambda&0\\0&0&\lambda
\end{pmatrix}.\]
\begin{example}\label{Repeated}
We shall diagonalise
\[\begin{pmatrix}1&1&0\\1&0&1\\0&1&1\end{pmatrix}
\ \mbox{and}\ \begin{pmatrix}1&0&0\\0&0&1\\0&1&0\end{pmatrix}.\]
\end{example}
As I said earlier, most of the applications of the results of
this section will occur in later courses but we can give one
important one immediately. Let ${\mathbf u}_{1}$, ${\mathbf u}_{2}$,
\dots, ${\mathbf u}_{n}$ be the standard orthonormal basis
of column vectors
for ${\mathbb R}^{n}$.
Consider a `quadratic form'
$Q:{\mathbb R}^{n}\rightarrow{\mathbb R}$ given by
\[Q({\mathbf x})=\sum_{i=1}^{n}\sum_{j=1}^{n}a_{ij}x_{i}x_{j},\]
where ${\mathbf x}=\sum_{i=1}^{n}x_{i}{\mathbf u}_{i}$.
It is clear that there is no loss in generality in taking
$a_{ij}=a_{ji}$. We then have
\[Q({\mathbf x})={\mathbf x}^{T}A{\mathbf x}\]
with $A$ the symmetric matrix $(a_{ij})$.
We know that there exists a special orthogonal matrix $P$
such that
\[P^{T}AP=D\]
with $D$ diagonal. In particular,
setting ${\mathbf e}_{j}=P{\mathbf u}_{j}$, we see
that the ${\mathbf e}_{j}$ form an orthonormal basis
for ${\mathbb R}^{n}$ such that, if
${\mathbf y}=\sum_{i=1}^{n}y_{i}{\mathbf e}_{i}$, then
\[Q({\mathbf y})=\sum_{i=1}^{n}d_{ii}y_{i}^{2}.\]
\begin{example} Suppose that
$f:{\mathbb R}^{2}\rightarrow{\mathbb R}$
is given by
\[f({\mathbf x})=ax^{2}+2bxy+cy^{2}\]
with respect to some orthogonal axes $Oxy$. Then we can find
orthogonal axes $OXY$ with respect to which
\[f({\mathbf x})=AX^{2}+CY^{2},\]
for some $A$ and $C$. We observe that
(i) If $A,C>0$, $f$ has a minimum at ${\mathbf 0}$.
(ii) If $A,C<0$, $f$ has a maximum at ${\mathbf 0}$.
(iii) If $A<00$)
and $R\in O({\mathbb R}^{n})$ such that
\[S=D_{\lambda}T_{\mathbf a}R.\]
\end{example}
The great German mathematician Klein suggested that geometry
was the study of those properties of ${\mathbb R}^{n}$
which are invariant
under the actions of a particular subgroup of $S({\mathbb R}^{n})$.
Thus `Euclidean Geometry' is the study of the
properties of ${\mathbb R}^{n}$ invariant
under the actions of the Euclidean group. A particularly
interesting example occurs when we consider the collection $G$
of $f\in S({\mathbb R}^{n})$ such that $f$ and $f^{-1}$
are continuous. (The reader may easily check that $G$
is in fact a group.) The study of the
properties of ${\mathbb R}^{n}$ invariant
under the actions of $G$ is now called topology.
These ideas will be taken up again in Part~II.
Continuing the geometric theme, we define the so-called
symmetry groups.
\begin{lemma} Let $X$ be a set of points in
${\mathbb R}^{n}$. The collection $G$ of $\sigma\in S(X)$
such that
\[||\sigma({\mathbf x})-\sigma({\mathbf y})||=
||{\mathbf x}-{\mathbf y}||\]
for all ${\mathbf x},{\mathbf y}\in X$ is a subgroup
of $S(X)$.
\end{lemma}
We call $G$ the symmetry group of $X$. If we pick a random
collection of points then, in general, $G$ will consist
of the single element $\iota$. However, if $X$ consists
of the vertices of a regular
polygon or solid, $G$ becomes more interesting. The syllabus
used to state
rather grandly that the lecturer should talk about
`symmetry groups of regular polygons
and solids'. Since Klein devoted an entire book to the study
of the symmetry group of the regular
icosahedron\footnote{There is a remarkable Open University
TV programme on the same subject.} this was rather tall order
and lecturers made only the feeblest attempt to carry it out.
In Example~\ref{cube} we shall look at the symmetry group
of the regular tetrahedron
(this is very easy, can you see why?) and the symmetry group of
the cube.
There will be a more detailed study of the symmetry
groups of the regular solids in a later geometry course.
For the moment we look at the symmetry group
of the regular polygons in the plane.
\begin{lemma} If $n\geq 3$, the symmetry group $D_{n}$
of the regular $n$-gon in ${\mathbb R}^{2}$ has
$2n$ elements. If $\alpha$ is a rotation about the centre
of the $n$-gon of $2\pi/n$ and $\beta$ is a reflection about
some axis of symmetry, then the distinct elements
of $D_{n}$ are
\[\iota,\ \alpha,\ \alpha^{2},\ \dots,\ \alpha^{n-1},
\beta,\ \alpha\beta,\ \alpha^{2}\beta,\ \dots,\ \alpha^{n-1}\beta.\]
Further
\[\alpha^{n}=\iota,\ \beta^{2}=\iota,\ \beta\alpha=\alpha^{-1}\beta.\]
\end{lemma}
(Since the mathematical literature is confused about whether to write
$D_{n}$ or $D_{2n}$ for the group just described, you should
always make it clear that you mean the symmetry group
of the regular $n$-gon.)
Students are liable to panic when faced with so many different
groups. They should note that the syllabus gives them as
\emph{examples} and that, though there is nothing to prevent
a rogue examiner suddenly asking for the definition of
some named group, a glance through previous examination
papers shows that, in practice, examiners give definitions
of all but the commonest groups.
\section{Abstract groups and isomorphism}
Traditional treatments of group theory begin not
with concrete but with abstract groups.
\begin{definition} We say that $(G,*)$ is an (abstract)
group if $G$ is a set and $*$ an operation such that
(i) If $a,b\in G$, then $a*b\in G$. \emph{(Closure)}
(ii) If $a,b,c\in G$, then $(a*b)*c=a*(b*c)$. \emph{(Associativity)}
(iii) There exists an $e\in G$ such that $e*a=a*e=a$ for
all $a\in G$. \emph{(Unit)}
(iv) If $a\in G$, then there exists an element $a^{-1}\in G$
with $a^{-1}*a=a*a^{-1}=e$. \emph{(Inverse)}
\end{definition}
The associativity rule means that we can bracket expressions
any way we like. Ordinary subtraction is not associative
since $(3-4)-5=-6\neq 4=3-(4-5)$ and so $({\mathbb Z},-)$
is not a group.
It will be helpful to get the following results out of the way once
and for all.
\begin{lemma} (i) The unit of a group $(G,*)$ is unique,
i.e., if $e*a=a*e=a$ and $e'*a=a*e'=a$ for all $a\in G$,
then $e=e'$.
(ii) The inverse of an element in a group $(G,*)$ is unique,
i.e., if $b*a=a*b=e$ and $c*a=a*c=e$, then $b=c$.
\end{lemma}
\begin{exercise} If $(G,*)$ is a group and $a,\,b\in G$
then $(ab)^{-1}=b^{-1}a^{-1}$.
\end{exercise}
\begin{definition} If $(G,*)$ is a group and $H\subseteq G$
we say that $(H,*)$ is a subgroup of $(G,*)$ if
(i) $e\in H$,
(ii) $x\in H$ implies $x^{-1}\in H$,
(iii) $x,y\in H$ implies $xy\in H$.
\end{definition}
(Exercise~\ref{Left right} embroiders this a little bit further.)
\begin{lemma} If $(H,*)$ is a subgroup of $(G,*)$ then
$(H,*)$ is a group.
\end{lemma}
Clearly, any concrete group is an abstract group so we
already have quite a collection of examples.
Here are some more.
\begin{example}
(i) $({\mathbb Z},+)$ is a group.
(ii) $({\mathbb R}^{n},+)$ with vector addition is a group.
(iii) If we take $C_{n}=\{0,1,2,\dots,n-1\}$ and take addition
modulo $n$, then $(C_{n},+)$ is a group (called the cyclic
group of order n).
(iv) If $(G,.)$ and $(H,.)$ are groups, then we may
define a new group $(G\times H,.)$ by
\[(g_{1},h_{1}).(g_{2},h_{2})=(g_{1}.g_{2},h_{1}.h_{2}).\]
\end{example}
From now on we shall often refer to a group $G$ rather than a
group $(G,*)$. We shall usually write $ab=a*b$.
The example of matrix groups like $GL({\mathbb R}^{n})$ for $n\geq 2$
shows us that we cannot assume automatically that $a*b=b*a$.
\begin{definition} We say that G is a commutative
(or an Abelian, or an abelian) group if
\[g*h=h*g\]
for all $g,h\in G$.
\end{definition}
\begin{example} (i) $({\mathbb Z},+)$, $({\mathbb R}^{n},+)$,
$(C_{n},+)$ and $SO({\mathbb R}^{2})$ are commutative.
(ii) $D_{n}$ the group of symmetries of the regular
$n$-gon $[n\geq 3]$, $O({\mathbb R}^{2})$ and $SO({\mathbb R}^{3})$
are non-commutative.
(iii) If $G$ and $H$ are commutative groups then $G\times H$
is commutative.
\end{example}
We can already see the possibility that what is essentially the
same group may turn up under different disguises. We
deal with this by introducing the notion of isomorphism.
\begin{definition} If $(G,*)$ and $(H,.)$ are groups
and $f:G\rightarrow H$ is a bijection such that
\[f(x*y)=f(x).f(y)\]
for all $x$, $y\in G$ then we say that $f$ is an
isomorphism and that $(G,*)$ and $(H,.)$ are isomorphic.
\end{definition}
The notion of isomorphism is closely linked with that
of homomorphism (or `group morphism').
\begin{definition} If $(G,*)$ and $(H,.)$ are groups
and $f:G\rightarrow H$ is a map such that
\[f(x*y)=f(x).f(y)\]
for all $x,\,y\in G$, then we say that $f$ is a
homomorphism.
\end{definition}
The following result is trivial but worth noting.
\begin{lemma} Let $f:G\rightarrow H$ be a homomorphism.
(i) If $G$ has unit $e_{G}$ and $H$ unit $e_{H}$,
then $f(e_{G})=e_{H}$.
(ii) If $x\in G$, then $f(x)^{-1}=f(x^{-1})$.
\end{lemma}
Normally we write $e$ for both $e_{G}$ and $e_{H}$.
Any reader who finds this confusing is free to continue
using the unambiguous notation $e_{G}$ and $e_{H}$.
We shall talk a bit about homomorphism later but, for the
moment, we concentrate on isomorphism.
Those members of my audience who are doing the Numbers and Sets
course should
note the following remark (the others may ignore it).
\begin{lemma} Let us write $G\equiv H$ if $G$ and $H$
are isomorphic. Then, if $G$, $H$ and $K$ are groups,
(i) $G\equiv G$.
(ii) $G\equiv H$ implies $H\equiv G$.
(iii) If $G\equiv H$ and $H\equiv K$ then $G\equiv K$.
\noindent Thus, isomorphism is an equivalence relation.
\end{lemma}
\begin{example} (i) If $G$ and $H$ are groups, then
$G\times H$ is isomorphic to $H\times G$.
(ii) $C_{2}\times C_{2}$ is not isomorphic to $C_{4}$.
(iii) $C_{2}\times C_{3}$ is isomorphic to $C_{6}$.
(iv) The set ${\mathbb R}\setminus\{0\}$ is a group under (ordinary)
multiplication which is not isomorphic to $({\mathbb R},+)$.
(v) The set $\{x\in{\mathbb R}:x>0\}$ is a group under (ordinary)
multiplication which is isomorphic to $({\mathbb R},+)$.
(vi) $S_{n}=S(\{1,2,\dots,n\})$ is isomorphic to $S(X)$
if and only if $X$ has $n$ elements.
(vii) $S_{3}$ and $D_{3}$ are isomorphic.
\end{example}
In general, to show two groups isomorphic, we look
for a `natural' map between them. To show they are
not isomorphic, we look for a `group property' possessed by one
but not the other.
\begin{example}\label{cube}
(i) The symmetry group of the regular tetrahedron
is isomorphic to $S_{4}$.
(ii) The symmetry group of the cube has 48 elements. The subgroup
consisting of rotations alone is isomorphic to $S_{4}$.
The symmetry group of the cube is isomorphic to
$S_{4}\times C_{2}$.
(iii) The symmetry group of the regular octahedron is isomorphic
to the symmetry group of the cube.
\end{example}
The proof of the next result is simple but faintly
Zen. (You may be relieved to note that the proof is not
in the syllabus.)
\begin{fact}{\bf (Cayley's Theorem.)}
Every abstract group $G$ is isomorphic to a
concrete group (more specifically to a subgroup of $S(G)$).
\end{fact}
Thus the study of abstract and concrete groups comes to the
same thing in the end.
If we think in terms of abstract groups rather than
concrete groups, we have to restate
what it means for a group $G$ to act faithfully on a set $X$.
\begin{definition}\label{Action 2} Suppose $G$ is a group
and $X$ a non-empty set.
If there exists a map $\theta:G\times X\rightarrow X$ such that,
writing $gx=\theta(g,x)$, we have
(i) $g(hx)=(gh)x$ for all $g,\,h\in G$ and $x\in X$,
(ii) $ex=x$ for all $x\in X$,
\noindent we say that $\theta$ is an action of $G$ on $X$,
or, more informally that $G$ acts on $X$.
\end{definition}
\begin{example} (i) If
$\theta:{\mathbb Z}\times {\mathbb R}\rightarrow {\mathbb R}$
is given by $\theta(n,x)=n+x$ then $\theta$
is an action of $({\mathbb Z},+)$ on ${\mathbb R}$.
(ii) If
$\phi:{\mathbb Z}\times {\mathbb R}\rightarrow {\mathbb R}$
is given by $\phi(n,x)=2n+x$ then $\phi$
is an action of $({\mathbb Z},+)$ on ${\mathbb R}$.
\end{example}
\begin{exercise} If $G$ acts on $X$ show that, if $g\in G$,
the map $x\mapsto gx$ is a bijection.
\end{exercise}
\begin{definition}\label{Action 3} Suppose $G$ is a group
acting on a non-empty set $X$. We say that $G$ acts
faithfully on $X$ if the only element $g$ of $G$
with the property $gx=x$ for all $x\in X$
is $e$ itself.
\end{definition}
More compactly `$G$ acts
faithfully on $X$ if and only if the relation $gx=x$ for all $x\in X$
implies $g=e$'.
\begin{exercise} Give an example of a group $G$
acting on a set $X$ which does not act faithfully.
\end{exercise}
The syllabus only demands that you know Definition~\ref{Action 2}.
Example~\ref{quotient action} sheds some more light on what is going on.
It should be noted that, for any but the smallest groups,
checking the associative law on a case by case basis
is essentially impossible. Thus the usual way to show
that something is a group is to show that it is a subgroup
of some other group and often this means showing that it
is (isomorphic to) a concrete group. More generally the
easiest way to study a particular group is often via
some isomorphic concrete group. On the other hand, the
general properties common to many groups are frequently
best approached by using abstract group theory.
We end with a simple but genuine theorem.
\begin{definition} We say that $G$ is cyclic if there exists
an $a\in G$ such that every element of $G$ has the form $a^{r}$
for some integer $r$.
\end{definition}
\begin{theorem} Every cyclic group is isomorphic to $({\mathbb Z},+)$
or to $C_{n}$ for some $n\geq 1$.
\end{theorem}
\section{Orbits and suchlike} We now return to groups
as objects that act on sets.
\begin{definition} Suppose $G$ is a group acting on a set X.
(i) If $x\in X$, the orbit $\Orb(x)$ of $x$ under $G$ is defined by
\[\Orb(x)=\{gx:g\in G\}.\]
(ii) If $x\in X$, the stabiliser $\Stab(x)$ of $x$ is defined by
\[\Stab(x)=\{g:gx=x\}.\]
\end{definition}
We use the following notation in the next lemma and
elsewhere.
\begin{definition}
If $F$ is a finite set we write $|F|$ for the number of elements
of $F$.
\end{definition}
\begin{lemma}\label{orbit} Suppose $G$ is a group acting on a set X.
(i) $\bigcup_{x\in X}\Orb(x)=X$.
(ii) If $x,y\in X$, then either $\Orb(x)\cap\Orb(y)=\emptyset$
or $\Orb(x)=\Orb(y)$.
(iii) If $X$ is finite and the distinct orbits under $G$ are
$\Orb(x_{1})$, $\Orb(x_{2})$, \dots, $\Orb(x_{m})$, then
\[|X|=|\Orb(x_{1})|+|\Orb(x_{2})|+ \dots +|\Orb(x_{m})|.\]
\end{lemma}
Those students doing the Numbers and Sets
course will recognise that
Lemma~\ref{orbit} could be proved by showing that
the relation
\[x\equiv y\ \mbox{if}\ y\in\Orb(x)\]
is an equivalence relation, but I shall prove it directly.
\begin{lemma} If $G$ is a group acting on a set X and $x\in X$
then $\Stab(x)$ is a subgroup of $G$.
\end{lemma}
\begin{example} Consider the group $SO({\mathbb R}^{3})$
acting on ${\mathbb R}^{3}$. If ${\mathbf x}\in{\mathbb R}^{3}$,
then
\[\Orb({\mathbf x})=\{{\mathbf y}\in{\mathbb R}^{3}:
||{\mathbf y}||=||{\mathbf x}||\},\]
the sphere with radius $||{\mathbf x}||$
and centre ${\mathbf 0}$.
If ${\mathbf x}\neq {\mathbf 0}$, then the stabiliser of
${\mathbf x}$ is the subgroup of rotations about an axis
through ${\mathbf x}$ and ${\mathbf 0}$. The stabiliser
of ${\mathbf 0}$ is the full group $SO({\mathbb R}^{3})$.
\end{example}
If $G$ is a group and $H$ is a subgroup of $G$, then
$H$ acts on $G$ by the map $(h,g)\mapsto hg$. The orbit
of an $x\in G$ is given a special name.
\begin{definition} If $H$ is a subgroup of a group $G$ and $x\in G$,
we write
\[Hx=\{hx:h\in H\}\]
and call $Hx$ a right coset of $H$. The left coset $xH$
is defined similarly\footnote{The reader is warned that
some mathematicians reverse this convention and call $xH$
a left coset.}.
\end{definition}
\begin{example} Consider $D_{3}$ the group of symmetries
of an equilateral triangle. If $H$ is a subgroup $\{\iota,\rho\}$
with $\rho$ a reflection and $\sigma$ is a non-trivial rotation,
then $H\sigma\neq\sigma H$.
\end{example}
We constantly make use of simple remarks of
the type illustrated in the next lemma.
\begin{lemma} If $H$ is a subgroup of a group $G$
and $a,b\in G$, then the following three statements are
equivalent.
(i) $aH=bH$,
(ii) $b^{-1}a\in H$,
(iii) $b^{-1}aH=H$.
\end{lemma}
Expressions of the type $PAP^{-1}$ occur throughout mathematics.
In the context of group theory we talk of conjugation.
\begin{definition}\label{D;conjugate groups}
(i) If $G$ is a group and and $x,y\in G$,
we say that $x$ and $y$ are conjugate if there exists
an $a\in G$ such that
\[x=aya^{-1}.\]
(ii) If $G$ is a group and and $H$ and $K$ are subgroups,
we say that $H$ and $K$ are conjugate if there exists
an $a\in G$ such that
\[H=aKa^{-1}.\]
or, more formally,
\[H=\{aka^{-1}:k\in K\}.\]
\end{definition}
Definition~\ref{D;conjugate groups}~(ii) is supplemented
by the following easy observation.
\begin{lemma} If $G$ is a group, $a\in G$ and $K$ a subgroup,
then $aKa^{-1}$ is a subgroup.
\end{lemma}
The following remarks are easy but useful.
\begin{lemma} (i) If we consider conjugacy of elements of a group $G$
and write $x\equiv y$ whenever $x$ and $y$
are conjugate then
(a) $x\equiv x$,
(b) $x\equiv y$ implies $y\equiv x$,
(c) If $x\equiv y$ and $y\equiv z$, then $x\equiv z$.
(ii) Similar results hold if we consider conjugacy of
subgroups of a group~$G$.
\end{lemma}
\begin{example} If $G$ is a group acting on a set $X$
then points in the same orbit have conjugate stabilisers.
\end{example}
\section{Lagrange's theorem}
Algebraists call the number of
elements in a finite group $G$ the order of $G$.
The following theorem can be
proved using the language of orbits but, in view
of its importance, I shall give a self contained proof.
\begin{theorem}{\bf (Lagrange's Theorem.)} If $G$ is finite group
and $H$ a subgroup, then the order of $H$ divides the order
of~$G$.
\end{theorem}
The proof of this theorem is book-work and frequently asked
for in exams. Example~\ref{Aunty} gives an example of a group
of order 12 with no subgroup of order 6.
The next result (the `orbit-stabiliser theorem') is also
an old examination favourite.
\begin{theorem} Suppose that $G$ is a group acting on a set
$X$ and that $x\in X$.
(i) There is a bijection between $\Orb(x)$ and the left cosets
of $\Stab{x}$.
(ii) If $G$ is finite, then $|\Orb(x)|=|G|/|\Stab{x}|$.
(iii) If $G$ is finite, then the number of elements in $\Orb(x)$
divides the order of $G$.
\end{theorem}
\begin{exercise} (i) Verify the orbit-stabiliser theorem
for the full group of isometries of the cube.
(ii) Let $X$ be a regular $6$-gon with centre $O$ and one vertex $A$.
Consider the group $G$ of symmetries of $X$
generated by rotation through $2\pi/3$
about $O$ and reflection in the line $OA$.
Verify the orbit-stabiliser theorem for $G$ acting on $X$.
\end{exercise}
\begin{definition} If $a\in G$ a group, then
(i) If $a=e$, we say $a$ has order $1$.
(ii) If $a\neq e$ and there exists an $r\neq 0$ such that
$a^{r}=e$, we say that $a$ has order
\[\min\{r>0:a^{r}=e\}.\]
(iii) If the equation $a^{r}=e$ has no solution with $r\neq 0$,
we say that $a$ has infinite order.
\end{definition}
\begin{theorem} If $G$ is finite group then the
order of any $a\in G$ divides the order of $G$.
In particular, $a^{|G|}=e$.
\end{theorem}
Note that $C_{2}\times C_{2}\times C_{2}$ has order 8
but contains no elements of order 4.
\begin{lemma} If $p$ is a prime, all groups of order $p$
are isomorphic to $C_{p}$.
\end{lemma}
Anyone who believes that the study of
finite commutative groups is trivial should test their belief
against the next collection of groups.
\begin{definition} We define $(R_{n},.)$ to be the set
of integers $r$ with $1\leq r\leq n$ and $r$ coprime to
$n$ with multiplication modulo $n$. We write $\phi(n)=|R_{n}|$.
(We call $\phi$ Euler's totient function.)
\end{definition}
It is not even trivial to show that $R_{n}$ is indeed
a group. We shall use the following result
(proved as a consequence of Euclid's Algorithm in
the Numbers and Sets course
for those of my audience who attend that
course) without proof.
\begin{fact} If integers
$n$ and $m$ are coprime, then there exist $a$ and $b$ integers
such that $an+bm=1$.
\end{fact}
\begin{lemma} $(R_{n},.)$ is a commutative group.
\end{lemma}
\begin{example}{\bf (Euler-Fermat Theorem.)} If $r$ and $n$ are coprime,
then
\[r^{\phi(n)}\equiv 1\mod n.\]
\end{example}
\begin{example}{\bf (Fermat's Little Theorem.)} If $p$ is a prime and
$1\leq r\leq p-1$, then
\[r^{p-1}\equiv 1\mod p.\]
\end{example}
Example~\ref{eight}, below, is not important in itself (indeed
if time presses I shall omit it) but provides useful
revision of much of our discussion of abstract finite
groups. We need a preliminary remark.
\begin{lemma} If $G$ is a group in which every element
has order 1 or 2 then $G$ is the product of cyclic groups of
order 2.
\end{lemma}
\begin{example}\label{eight} Up to isomorphism the
only groups of order 8 or less are
(i) $\{e\}$ (isomorphic to $S_{1}$ and $C_{1}$),
(ii) $C_{2}$,
(iii) $C_{3}$,
(iv) $C_{4}$, $C_{2}\times C_{2}$ (isomorphic to the symmetry
group of the rectangle),
(v) $C_{5}$,
(vi) $C_{2}\times C_{3}$ (isomorphic to
$C_{3}\times C_{2}$ and $C_{6}$), $D_{3}$ (isomorphic
to $S_{3}$),
(vii) $C_{7}$
(viii) $C_{8}$, $C_{2}\times C_{4}$ (isomorphic to $C_{4}\times C_{2}$),
$C_{2}\times C_{2}\times C_{2}$, $D_{4}$ and possibly a further
group $Q$.
\noindent All these groups are non-isomorphic unless specified
otherwise.
\end{example}
Our next task is to satisfy ourselves that the putative group
$Q$ does in fact exist.
\begin{example} Consider the $2\times 2$ matrices
\[{\mathbf i}=\begin{pmatrix}i&0\\0&-i\end{pmatrix},
\ {\mathbf j}=\begin{pmatrix}0&1\\-1&0\end{pmatrix},
\ {\mathbf k}=\begin{pmatrix}0&i\\i&0\end{pmatrix},\]
together with $I$, $-I$, $-{\mathbf i}$, $-{\mathbf j}$
and $-{\mathbf k}$. The set
\[Q=\{I,-I,{\mathbf i},-{\mathbf i},
{\mathbf j},-{\mathbf j},{\mathbf k},-{\mathbf k}\}\]
forms a subgroup $Q$ of $GL({\mathbb C}^{2})$ of order 8 with
\[{\mathbf i}^{2}={\mathbf j}^{2}={\mathbf k}^{2}=-I,
\ {\mathbf i}{\mathbf j}=-{\mathbf j}{\mathbf i}={\mathbf k},
\ {\mathbf j}{\mathbf k}=-{\mathbf k}{\mathbf j}={\mathbf i},
\ {\mathbf k}{\mathbf i}=-{\mathbf i}{\mathbf k}={\mathbf j}.\]
\end{example}
\begin{example}\label{Example, quaternions}
Continuing with the notation of the previous
example, let us consider the collection ${\mathcal Q}$ of matrices
\[{\mathbf x}=x_{0}I+x_{1}{\mathbf i}+x_{2}{\mathbf j}
+x_{3}{\mathbf k}.\]
If we add or multiply matrices in ${\mathcal Q}$ we obtain matrices
in ${\mathcal Q}$. Further, if we write
\[{\mathbf x}^{*}=x_{0}I-x_{1}{\mathbf i}-x_{2}{\mathbf j}
-x_{3}{\mathbf k},\]
then
\[{\mathbf x}{\mathbf x}^{*}={\mathbf x}^{*}{\mathbf x}=
x_{0}^{2}+x_{1}^{2}+x_{2}^{2}
+x_{3}^{2}=||{\mathbf x}||^{2}, \mbox{say},\]
and so, if ${\mathbf x}\neq{\mathbf 0}$, we may set
${\mathbf x}^{-1}=||{\mathbf x}||^{-2}{\mathbf x}^{*}$
and obtain
\[{\mathbf x}^{-1}{\mathbf x}={\mathbf x}{\mathbf x}^{-1}=I.\]
\end{example}
Thus we obtain the system ${\mathcal Q}$ of Hamilton's
quaternions which behaves much like ${\mathbb R}$ and ${\mathbb C}$
although multiplication is not commutative
(${\mathbf i}{\mathbf j}\neq {\mathbf j}{\mathbf i}$).
The quaternions form a fascinating system but further
discussion would take us too far afield.
\section{A brief look at quotient groups}
The notion of a quotient
group (like the notion of quotients in general) is extremely
useful but also fairly subtle.
If we consider a group $G$ with a subgroup $H$, then it is
natural to try and put a group structure on the
collection of left cosets $gH$. The `natural definition'
is to write $g_{1}H.g_{2}H=g_{1}g_{2}H$. BUT THIS DOES
NOT ALWAYS WORK.
\begin{theorem} Let $G$ be a group and $H$ a subgroup of $G$.
The following two statements are equivalent
(i) $g_{1}H=g'_{1}H,\ g_{2}H=g'_{2}H\ \Rightarrow
g_{1}g_{2}H=g'_{1}g'_{2}H$,
(ii) $ghg^{-1}\in H$ whenever $g\in G$, $h\in H$.
\end{theorem}
In view of this, we make the following definitions.
\begin{definition} If $H$ is a subgroup of a group $G$ we say that
$H$ is \emph{normal} if
$ghg^{-1}\in H$ whenever $g\in G$, $h\in H$.
\end{definition}
This is sometimes restated as `$H$ is normal if $gHg^{-1}\subseteq H$
for all $g\in G$' or (using the next lemma)
`$H$ is normal if the only subgroup conjugate
to $H$ is $H$ itself'.
\begin{lemma} Let $H$ is a subgroup of a group $G$. Then $H$ is normal
if and only if $gH=Hg$ for all $g\in G$.
\end{lemma}
Since left and right cosets agree for $H$ normal, we shall
refer, in this case, to `cosets' rather than left cosets.
\begin{definition} If $H$ is a normal subgroup of a group $G$,
the set $G/H$ of cosets $gH$ with multiplication defined by
$g_{1}H.g_{2}H=g_{1}g_{2}H$ is called a quotient group.
\end{definition}
\begin{theorem} Quotient groups are groups.
\end{theorem}
I reiterate the warning above:
\begin{center}
\framebox{ QUOTIENT GROUPS EXIST ONLY FOR
NORMAL SUBGROUPS.}
\end{center}
\begin{example} (i) Subgroups of Abelian groups are always normal.
(ii) If $D_{3}$ is the symmetry group of the equilateral triangle,
then no subgroup of order 2 is normal.
(iii) If $H$ is a subgroup of the finite group $G$ and $|H|=|G|/2$,
then $H$ is normal in $G$.
\end{example}
Here is a natural example of the use of quotient groups.
\begin{example}\label{quotient action}
Suppose that $G$ is a group
acting on a non-empty set $X$
and we write
\[H=\{g\in G\,:\,gx=x\ \text{for all $x\in X$}\}.\]
Then $H$ is a normal subgroup of $G$ and, if we take
\[(gH)x=gx,\]
then $G/H$ acts faithfully on $X$.
\end{example}
Quotient groups and homomorphisms are intimately linked.
\begin{definition} If $\theta:G\rightarrow H$ is a homomorphism
we define the image $\theta(G)$ of $\theta$ by
\[\theta(G)=\{\theta(g):g\in G\},\]
and the kernel $\ker(\theta)$ of $\theta$ by
\[\ker(\theta)=\theta^{-1}(e)=\{g\in G:\theta(g)=e\}.\]
\end{definition}
\begin{lemma} Let $\theta:G\rightarrow H$ be a homomorphism.
(i) $\theta(G)$ is a subgroup of $H$.
(ii) $\ker(\theta)$ is a subgroup of $G$.
(iii) $\ker(\theta)$ is a normal subgroup of $G$.
(iv) The equation $\theta(g)=h$ with $h\in H$ has a solution in $G$ if
and only if $h\in\theta(G)$.
(v) If $g_{1}\in G$ is a solution of $\theta(g)=h$ with $h\in H$,
then $g_{2}\in G$ is a solution if and only if
$g_{2}\in g_{1}\ker(\theta)$.
\end{lemma}
\begin{theorem}{\bf (The isomorphism theorem.)}\label{isomorphism theorem}
If $\theta:G\rightarrow H$ is a homomorphism,
then $G/\ker\theta$ is isomorphic to $\theta(G)$.
\end{theorem}
\begin{example} (i) Consider the additive group
$({\mathbb Z},+)$ and the cyclic group $(C_{n},.)$ generated
by $a$, say. If
$\theta:{\mathbb Z}\rightarrow C_{n}$ is given by
\[\theta(r)=a^{r},\]
then $\theta$ is a homomorphism,
\[\ker(\theta)=\{r:r\equiv 0\mod{n}\}
=n{\mathbb Z}\ \text{and}\ \theta({\mathbb Z})=C_{n}.\]
Thus ${\mathbb Z}/n{\mathbb Z}$ is isomorphic to $C_{n}$.
(ii) Consider $D_{3}$ generated by $a$ and $b$ with $a^{3}=e$,
$b^{2}=e$ and $ba=a^{-1}b$ and the cyclic group $C_{2}$ generated by
$c$ with $c^{2}=e$. If we set
\[\theta(a^{r}b^{s})=c^{s},\]
then $\theta:D_{3}\rightarrow C_{2}$ is a homomorphism,
$\theta(D_{3})=C_{2}$ and $\ker(\theta)$ is isomorphic to $C_{3}$.
(iii) Consider the cyclic group $C_{6}$
generated by $a$ with $a^{6}=e$,
and the cyclic group $C_{2}$ generated by
$c$ with $c^{2}=e$. If we set
\[\theta(a^{r})=c^{r},\]
then $\theta:C_{6}\rightarrow C_{2}$ is a homomorphism,
$\theta(C_{6})=C_{2}$ and $\ker(\theta)$ is isomorphic to $C_{3}$.
(iv) If $p$ and $q$ are distinct primes,
then the only homomorphism \mbox{$\theta:C_{p}\rightarrow C_{q}$}
is the trivial map $\theta(g)=e$ for all $g\in C_{p}$.
\end{example}
Notice that every normal subgroup can be obtained a kernel.
\begin{example} If $H$ is a normal subgroup of a group $G$,
then the map $\theta:G\rightarrow G/H$ given by
\[\theta(g)=gH\]
is a homomorphism with kernel $H$.
\end{example}
\section{The M\"{o}bius group} At the beginning of the course
you briefly looked at the M\"{o}bius map $\tilde{T}$
\[\tilde{T}(z)=\frac{az+b}{cz+d},\]
where $ad-bc\neq 0$.
This is `almost a well defined bijective map on ${\mathbb C}$'
but, if $c\neq 0$, there are two problems. The first is
that $\tilde{T}(-d/c)$ is not defined and the second is
that there is no $w\in{\mathbb C}$ with $\tilde{T}(w)=a/c$.
We get round this by adding a new point $\infty$
(`the point at infinity' in old fashioned language)
to ${\mathbb C}$.
\begin{definition} If $a,b,c,d\in {\mathbb C}$ and $ad-bc\neq 0$
we define the M\"{o}bius map $T$ which will be written
informally as
\[T(z)=\frac{az+b}{cz+d}\]
by the following formal rules.
(i) If $c\neq 0$, then
\begin{align*}
T(z)&=\frac{az+b}{cz+d}\qquad\mbox{when}
\ z\in{\mathbb C}\setminus\{-d/c\},\\
T(-d/c)&=\infty,\\
T(\infty)&=a/c.
\end{align*}
(ii) If $c=0$, then
\begin{align*}
T(z)&=\frac{az+b}{cz+d}\qquad\mbox{when}
\ z\in{\mathbb C},\\
T(\infty)&=\infty.
\end{align*}
\end{definition}
Henceforward, when we talk about a M\"{o}bius map
we shall use this definition.
M\"{o}bius maps will reappear in geometry courses
and play an important r\^{o}le throughout complex variable theory.
The following result will be the key to our treatment of
M\"{o}bius maps.
\begin{lemma} Every M\"{o}bius map is the composition
of M\"{o}bius maps of the following three forms
$[\alpha,\lambda\in{\mathbb C},\ \lambda\neq 0]$
\begin{alignat*}{2}
S_{1}(z)&=z+\alpha&\qquad&\mbox{(Translation)},\\
S_{2}(z)&=\lambda z&\qquad&\mbox{(Rotation and dilation)},\\
S_{3}(z)&=\frac{1}{z}.&&
\end{alignat*}
\end{lemma}
\begin{theorem} The collection ${\mathcal M}$ of M\"{o}bius maps
forms a group acting on ${\mathbb C}\cup\{\infty\}$.
\end{theorem}
\begin{lemma} The general equation of a circle or straight line
in ${\mathbb C}$ is
\[Azz^{*}+Bz^{*}+B^{*}z+C=0\]
with $A$ and $C$ real and $|B|^{2}>AC$. We have a straight line
if and only if $A=0$. We have a locus through $0$
(i.e. the set defined by our equation contains $0$)
if and only if $C=0$.
\end{lemma}
\begin{theorem} The M\"{o}bius transform $T$ given by $Tz=z^{-1}$
takes circles and straight lines to circles and straight lines.
Any straight line is taken to a circle or straight line through $0$.
Any circle or straight line through $0$ is taken to a
straight line.
\end{theorem}
\begin{theorem}\label{circle} M\"{o}bius transforms take circles
and straight lines to circles and straight lines.
\end{theorem}
\begin{example}{\bf (Inversion.)} (i) If $k>0$, the map
$T:{\mathbb R}^{2}\cup\{\infty\}\rightarrow
{\mathbb R}^{2}\cup\{\infty\}$ given, in polars, by
$T(r,\theta)=(kr^{-1},\theta)$ for $r\neq 0$,
$T{\mathbf 0}=\infty$, $T{\infty}={\mathbf 0}$ takes circles
and straight lines to circles and straight lines.
(ii) If $k>0$, the map
$T:{\mathbb R}^{3}\cup\{\infty\}\rightarrow
{\mathbb R}^{3}\cup\{\infty\}$ given by
$T{\mathbf r}=kr^{-2}{\mathbf r}$ for ${\mathbf r}\neq{\mathbf 0}$,
$T{\mathbf 0}=\infty$, $T{\infty}={\mathbf 0}$ takes spheres
and planes to spheres and planes.
\end{example}
\begin{example}{\bf (Peaucellier's Inversor.)} Consider a set
of jointed rods with $OB$, $OD$ of equal length and
$AB$, $BC$, $CD$, $DA$ of equal length. If $O$ is a fixed
point but the rest of the framework is free to move in a
plane then, if $A$ describes part of a circle through $O$, $C$ will
describe part of a straight line.
\end{example}
\begin{definition} If $z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$
are distinct complex numbers, we write
\[[z_{1},z_{2},z_{3},z_{4}]=
\frac{(z_{4}-z_{1})(z_{2}-z_{3})}{(z_{4}-z_{3})(z_{2}-z_{1})}.\]
and call $[z_{1},z_{2},z_{3},z_{4}]$ the cross ratio of
$z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$.
\end{definition}
As might be expected, different authors permute the
suffices in different ways so you should always state
which definition you use\footnote{The particular choice
we made has the property that,
if we write, $Tw=[z_{1},z_{2},z_{3},w]$
then $Tz_{1}=0$, $Tz_{2}=1$ and $Tz_{3}=\infty$.}.
(All the theorems remain unaltered
whichever definition is used.)
\begin{theorem} (i) Cross ratio is unaltered by M\"{o}bius transformation.
Thus, if $z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$
are distinct complex numbers and $T$ is a M\"{o}bius transform,
\[[Tz_{1},Tz_{2},Tz_{3},Tz_{4}]=[z_{1},z_{2},z_{3},z_{4}].\]
(ii) If $T$ is a M{\"o}bius map with $T0=0$, $T1=1$
and $T\infty=\infty$, then $T=I$.
(iii) If $z_{1}$, $z_{2}$ and $z_{3}$ are distinct complex numbers,
then there exists a unique M\"{o}bius transform $T$
such that
\[Tz_{1}=0,\ Tz_{2}=1,\ Tz_{3}=\infty.\]
(iv) If $z_{1}$, $z_{2}$ and $z_{3}$ are distinct complex numbers
and $w_{1}$, $w_{2}$ and $w_{3}$ are distinct complex numbers,
then there exists a unique M\"{o}bius transform $T$
such that
\[Tz_{1}=w_{1},\ Tz_{2}=w_{2},\ Tz_{3}=w_{3}.\]
\end{theorem}
\begin{example}\label{Circle} The distinct complex numbers
$z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$ lie on a circle
(or straight line) if and only if their cross ratio
$[z_{1},z_{2},z_{3},z_{4}]$ is real.
\end{example}
Note that this gives us an alternative proof of
Theorem~\ref{circle}\footnote{To avoid circularity
we have to give an alternative proof of Example~\ref{Circle}.
One way of obtaining such a proof
is to use the fact that (provided we choose
their sign appropriately) angles on the same chord
of a circle are equal modulo $\pi$.}.
It is interesting to apply some of our general ideas on groups
to the specific group ${\mathcal M}$.
\begin{lemma} The collection $GL({\mathbb C}^{n})$ of
invertible $n\times n$ complex matrices forms a group
under matrix multiplication. The set
\[SL({\mathbb C}^{n})=\{A\in GL({\mathbb C}^{n}):
\det A=1\}\]
is a subgroup.
\end{lemma}
\begin{lemma}\label{special}
(i) The map $\theta:GL({\mathbb C}^{2})\rightarrow{\mathcal M}$
given by
\[\theta \begin{pmatrix}a&b\\c&d\end{pmatrix}(z)=\frac{az+b}{cz+d}\]
is a surjective homomorphism with kernel the subgroup
\[\{\lambda I:\lambda\in{\mathbb C},\ \lambda\neq 0\}.\]
(ii) The map $\theta:SL({\mathbb C}^{2})\rightarrow{\mathcal M}$
given by
\[\theta \begin{pmatrix}a&b\\c&d\end{pmatrix}(z)=\frac{az+b}{cz+d}\]
is a surjective homomorphism with kernel the subgroup
$\{I,-I\}$.
\end{lemma}
We note that a simple modification of Theorem~\ref{canonical1}
gives the following lemma.
\begin{lemma}
If $A\in SL({\mathbb C}^{2})$,
then exactly one of the following three things must happen.
(i) We can find
$P\in SL({\mathbb C}^{2})$ such that
\[P^{-1}AP=\begin{pmatrix}\lambda&0\\0&\lambda^{-1}\end{pmatrix}\]
for some $\lambda\in{\mathbb C}$ with $\lambda\neq 1,-1,0$.
(ii) $A=\pm I$.
(iii) We can find
$P\in SL({\mathbb C}^{2})$ such that
\[P^{-1}AP=\begin{pmatrix}\lambda&1\\0&\lambda\end{pmatrix}.\]
with $\lambda=\pm 1$.
\end{lemma}
Using Lemma~\ref{special}~(ii) this gives us the following result
on M\"{o}bius maps.
\begin{lemma}\label{Fix}
If $T\in{\mathcal M}$, then one of the following
three things must happen.
(i) $T$ is conjugate to a map $S$ of the form $Sz=\mu z$
(i.e., a rotation and dilation) with $\mu\neq 1$.
(ii) $T=\iota$ (i.e., $Tz=z$ for all $z$).
(iii) $T$ is conjugate to the map $S$ given by $Sz=z\pm 1$
(a translation).
\end{lemma}
Note that we seperate (i) and (ii) because $T$ behaves very
differently in the two cases. In Lemma~\ref{Fix more}
we see that the two maps $Sz=z\pm 1$ are, in fact, conjugate.
We say that a map $F:X\rightarrow X$ has $x\in X$ as a fixed point
if $f(x)=x$.
\begin{lemma} If $T\in{\mathcal M}$, then $T=\iota$ or $T$
has one or two fixed points.
\end{lemma}
If we are interested in the behaviour of $T^{n}z$, then
(by conjugating with the M\"{o}bius map $S$ given by $Sz=1/z$
in (i) and by a similar trick in (iii))
we can obtain a slightly more refined version of Lemma~\ref{Fix}.
\begin{lemma}\label{Fix more}
If $T\in{\mathcal M}$ then one of the following
four things must happen.
(ia) $T$ is conjugate to a map $S$ of the form $Sz=\mu z$
with $|\mu|>1$.
(ib) $T$ is conjugate to a map $S$ of the form $Sz=\mu z$
with $|\mu|=1$ (pure rotation) with $\mu\neq 1$.
(ii) $T=\iota$ (i.e., $Tz=z$ for all $z$).
(iii) $T$ is conjugate to the map $S$ given by $Sz=z+1$
(a translation).
\end{lemma}
Thus, given a $T\in{\mathcal M}$, we can find $Q\in{\mathcal M}$
such that $S=QTQ^{-1}$ takes one of the simple forms
above. Since
\[T^{n}z=Q^{-1}(S^{n}(Q(z))),\]
study of $S^{n}w$ with $w=Q(z)$ tells us about $T^{n}z$.
Exercise~\ref{Fix more direct} outlines a direct proof
of Lemma~\ref{Fix more} which does not depend on
looking at $SL({\mathbb C}^{2})$.
\section{Permutation groups} We now return to the study of
the finite permutation group
\[S_{n}=S(\{1,2,\dots,n\}),\]
\begin{definition} If $\sigma\in S_{n}$ has the property
that there exist distinct $a_{1},a_{2},\dots,a_{m}$ such that
\begin{alignat*}{2}
\sigma(a_{j})&=a_{j+1}&\qquad&[1\leq j\leq m-1],\\
\sigma(a_{m})&=a_{1},&\\
\sigma(x)&=x&\qquad&\mbox{if}\ x\notin\{a_{1},a_{2},\dots,a_{m}\},
\end{alignat*}
we say that $\sigma$ is a cycle of length $m$ and write
\[\sigma=(a_{1}a_{2}\dots a_{m}).\]
If $\{a_{1},a_{2},\dots,a_{m}\}\cap\{b_{1},b_{2},\dots,b_{p}\}
=\emptyset$ we say that the cycles $(a_{1}a_{2}\dots a_{m})$
and $(b_{1}b_{2}\dots b_{p})$ are disjoint.
\end{definition}
\begin{lemma}\label{not-unique}
(i) $(a_{1}a_{2}\dots a_{m})=(a_{m}a_{1}\dots a_{m-1})
=(a_{m-1}a_{m}\dots a_{m-2})=\dots$ (i.e., cycles can be
cycled).
(ii) If $\sigma$ and $\tau$ are disjoint cycles then
$\sigma\tau=\tau\sigma$ (i.e., disjoint cycles commute).
(iii) $(a)=\iota$ for all $a\in\{1,2,\dots,n\}$.
\end{lemma}
\begin{theorem} Every $\sigma\in S_{n}$ can be written as the product
of disjoint cycles. The representation is unique subject
to the variations allowed by Lemma~\ref{not-unique}.
\end{theorem}
\begin{example} (i) If $\sigma$ is the product of disjoint
cycles of length $n_{1}$, $n_{2}$, \dots, $n_{k}$, then
$\sigma$ has order $\operatorname{lcm}(n_{1},n_{2},\dots,n_{k})$.
(ii) If a pack of $N$ cards is repeatedly shuffled
using exactly the same shuffle, then the pack will return to its
initial state after at most
\[\max\{\operatorname{lcm}
(n_{1},n_{2},\dots,n_{k}):n_{1}+n_{2}+\dots+n_{k}=N\}\]
shuffles.
\end{example}
The following results are very useful in later work (but experience
shows that by the time the results are needed they will have been
forgotten and need to be painfully relearned).
\begin{lemma}\label{L;cycle type}
(i) If $\sigma\in S_{n}$, then
\[\sigma(a_{1}a_{2}\dots a_{m})\sigma^{-1}
=(\sigma(a_{1}) \sigma(a_{2})\dots\sigma(a_{m})).\]
(ii) If the $(a_{1j}a_{2j}\dots a_{m(j)j})$ are disjoint cycles
and $\sigma\in S_{n}$, then
\[\sigma\left(\prod_{j=1}^{s}(a_{1j}a_{2j}\dots a_{m(j)j})
\right)\sigma^{-1}
=\prod_{j=1}^{s}(\sigma(a_{1j}) \sigma(a_{2j})\dots
\sigma(a_{m(j)j}))\]
(iii) Two elements of $S_{n}$ are conjugate if and only if they
are the product of the same number of disjoint cycles of each length
(have the same `cycle type').
\end{lemma}
When thinking about results like~(iii) we must be careful to
treat cycles of length $1$ consistently.
The next lemma is obvious. We refer to cycles of length 2
as transpositions.
\begin{lemma}\label{L, product transpositions}
Every $\sigma\in S_{n}$ is the product of transpositions.
\end{lemma}
What is much less obvious is the next result.
\begin{lemma} The product of an even number of transpositions
cannot be written as the product of an odd number of transpositions
and vice versa.
\end{lemma}
We obtain this result as the consequence of an equivalent statement.
\begin{theorem}{\bf (Existence of Signature.)}\label{signature}
(i) If $n\geq2$ there exists a
non-trivial homomorphism $\zeta$ from $S_{n}$ to the
multiplicative group $(\{-1,1\},\times)$.
(ii) There is only one non-trivial homomorphism $\zeta$ from $S_{n}$ to the
multiplicative group $(\{-1,1\},\times)$ $[n\geq 2]$. If $\sigma$ is
the product of $m$ transpositions,
then $\zeta(\sigma)=(-1)^{m}$.
\end{theorem}
(Exercise~\ref{Vandermonde} sheds some
light on our proof of Theorem~\ref{signature}.)
We call the $\zeta$ of Theorem~\ref{signature} the signature.
It will be used in the treatment of the determinant in
the linear algebra course and in the treatment of alternating forms in
later algebra. The signature also appears (via the alternating group
$A_{n}$ defined below in Definition~\ref{Alternate}) in the discussion
of the symmetry groups of regular polyhedra in later geometry courses
and in Galois Theory.
The following example, which is emphatically outside the schedules,
shows that the existence of a signature is not obvious.
\begin{example} Suppose $\theta:S({\mathbb Z})\rightarrow\{-1,1\}$
is a homomorphism. Extending our notation from the finite
case, suppose that
\[\sigma=(12)(34)(56)\dots\]
and that $\tau$ is the shift map given by $\tau(j)=j+1$ for
all $j\in{\mathbb Z}$.
(i) $\tau^{2}\sigma\tau^{-2}=(34)(56)(78)\dots$.
(ii) $\sigma\tau^{2}\sigma\tau^{-2}=(12)$.
(iii) $\theta((12))=1$.
(iv) If $\mu$ is the product of a finite number of cycles of
finite length then $\theta(\mu)=1$.
\end{example}
Computation of signatures is made easy by the following observation.
\begin{lemma} (i)
$(a_{1}a_{m+1})(a_{1}a_{2}\dots a_{m})
=(a_{1}a_{2}\dots a_{m}a_{m+1})$.
(ii) A cycle of length $k$ has signature $(-1)^{k+1}$.
\end{lemma}
\begin{definition}\label{Alternate}
If $n\geq 2$ and $\zeta:S_{n}\rightarrow\{-1,1\}$
is the signature, we write $A_{n}=\ker(\zeta)$ and call $A_{n}$
the alternating group.
\end{definition}
\begin{lemma} (i) $S_{n}$ has order $n!$.
(ii) $A_{n}$ is a normal subgroup of $S_{n}$. $S_{n}/A_{n}$ is isomorphic
to $C_{2}$.
(iii) $A_{n}$ has order $n!/2$.
\end{lemma}
\begin{exercise} Use the orbit-stabiliser theorem
to prove that $S_{n}$ has order $n!$ and $A_{n}$ has order $n!/2$.
\end{exercise}
The following result is interesting in itself and will give
us some practice in working with $A_{n}$.
\begin{example}\label{Aunty}
$A_{4}$ has order 12 but has no subgroup of order 6.
\end{example}
\section{Trailers} (The contents of this section are not
part of the course and I will not lecture on them unless there is
time at the end.)
One of the main topics in my treatment of this course
was the subject of distance
preserving linear maps, that is linear maps
$\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$ such that
if $\alpha{\mathbf x}={\mathbf x}^{\prime}$ then
\[x_{1}^{2}+x_{2}^{2}+\dots+x_{n}^{2}=
x_{1}^{\prime 2}+x_{2}^{\prime 2}+\dots+x_{n}^{\prime 2}.\]
In Einstein's Special Theory of Relativity, which is the subject
of a course in the third term,
particular interest is attached to those linear
maps on ${\mathbb R}^{3}\times{\mathbb R}$ (that is `space-time')
which leave
\[x^{2}+y^{2}+z^{2}-(ct)^{2}\]
unchanged. Normalising and generalising, this suggests that we
should study the linear maps
$\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$ such that
if $\alpha{\mathbf x}={\mathbf x}^{\prime}$, then
\begin{align*}
x_{1}^{2}+x_{2}^{2}&+\dots+x_{m}^{2}-x_{m+1}^{2}-x_{m+2}^{2}
-\dots-x_{n}^{2}\\
&=x_{1}^{\prime 2}+x_{2}^{\prime 2}+\dots+x_{m}^{\prime 2}
-x_{m+1}^{\prime 2}-x_{m+2}^{\prime 2}-\dots-x_{n}^{\prime 2}.
\end{align*}
This is too much of a challenge for the moment (it will be easier
after next year's linear algebra course) so we study the simplest case.
\begin{example} The collection ${\cal L}$ of linear maps
$\alpha:{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
such that, if $\alpha{\mathbf x}={\mathbf x}^{\prime}$,
then
\[x_{1}^{2}-x_{2}^{2}=x_{1}^{\prime 2}-x_{2}^{\prime 2}\]
forms a group ${\mathcal L}$. If we write ${\mathcal L}_{0}$
for the set of
linear maps $\alpha:{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
which have matrix
\[\begin{pmatrix}\cosh t&\sinh t\\\sinh t&\cosh t\end{pmatrix},\]
with respect to the standard basis then:
(i) ${\mathcal L}_{0}$ is a subgroup of ${\mathcal L}$.
(ii) ${\mathcal L}$ is the union of the four disjoint
cosets $E_{j}{\mathcal L}_{0}$ with
\[E_{1}=I,\ E_{2}=-I,\ E_{3}=
\begin{pmatrix}1&0\\0&-1\end{pmatrix},
\ E_{4}=\begin{pmatrix}-1&0\\0&1\end{pmatrix}.\]
(iii) ${\mathcal L}_{0}$ is normal.
(iv) ${\mathcal L}_{0}$ is isomorphic to $({\mathbb R},+)$.
(v) $\{E_{j}:1\leq j\leq 4\}$ is a subgroup of ${\mathcal L}$
isomorphic to $C_{2}\times C_{2}$ but ${\mathcal L}$
is not commutative and so, in particular, not isomorphic
to $C_{2}\times C_{2}\times {\mathbb R}$.
\end{example}
Groups like ${\mathcal L}$ are called Lorentz groups after the
great Dutch physicist who first formulated the transformation
rules which underlie the Special Theory of Relativity.
Next year's linear algebra course
contain generalisations of the notions of
orthogonal and symmetric matrices and maps from the real to the complex
case. When people become algebraists they have to swear
a terrible oath never to reveal that their subject has
applications to other parts of mathematics and
the linear algebra course has been
designed with this in mind. However the generalisations
are used in classical and, particularly, in modern
physics.
We start by defining an inner product on ${\mathbb C}^{n}$
by
\[\langle{\mathbf z},{\mathbf w}\rangle=\sum_{r=1}^{n}z_{r}w_{r}^{*}.\]
\begin{lemma} If ${\mathbf z},{\mathbf w},{\mathbf u}
\in{\mathbb C}^{n}$ and $\lambda\in{\mathbb C}$, then
(i) $\langle{\mathbf z},{\mathbf z}\rangle$ is always real
and positive.
(ii) $\langle{\mathbf z},{\mathbf z}\rangle=0$ if and only if
${\mathbf z}={\mathbf 0}$.
(iii) $\langle\lambda{\mathbf z},{\mathbf w}\rangle=
\lambda\langle{\mathbf z},{\mathbf w}\rangle$.
(iv) $\langle{\mathbf z}+{\mathbf u},{\mathbf w}\rangle=
\langle{\mathbf z},{\mathbf w}\rangle
+\langle{\mathbf u},{\mathbf w}\rangle$.
(v) $\langle{\mathbf w},{\mathbf z}\rangle=
\langle{\mathbf z},{\mathbf w}\rangle^{*}$.
\end{lemma}
Rule (v) is a warning that we must tread carefully with
our new complex inner product and not expect it to behave
quite as simply as the old real inner product. However, it
turns out that
\[||{\mathbf z}||=\langle{\mathbf z},{\mathbf z}\rangle^{1/2}\]
behaves just as we wish it to behave. (This is not really
surprising, if we write $z_{r}=x_{r}+iy_{r}$ with $x_{r}$
and $y_{r}$ real, we get
\[||{\mathbf z}||^{2}=\sum_{r=1}^{n}x_{r}^{2}+\sum_{r=1}^{n}y_{r}^{2}\]
which is clearly well behaved.)
We can take over Definition~\ref{S1} and Lemma~\ref{S2}
directly.
\begin{definition} (i) We say that ${\mathbf a}$ and
${\mathbf b}$ are orthogonal if
$\langle{\mathbf a},{\mathbf b}\rangle=0$.
(ii) We say that ${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ are orthonormal if
\[\langle{\mathbf e}_{i},{\mathbf e}_{j}\rangle=\delta_{ij}\]
for all $1\leq i,j\leq n$.
\end{definition}
\begin{lemma} Any system of $n$ orthonormal vectors
${\mathbf e}_{1}$, ${\mathbf e}_{2}$,
\dots, ${\mathbf e}_{n}$ forms a basis for ${\mathbb C}^{n}$.
(We call this an orthonormal basis.) If
${\mathbf x}\in{\mathbb C}^{n}$, then
\[{\mathbf x}=\sum_{r=1}^{n}\langle{\mathbf x},{\mathbf e}_{r}\rangle
{\mathbf e}_{r}.\]
\end{lemma}
If we now ask which linear maps preserve our new distance,
we get
results and definitions which parallel the sequence from Lemma~\ref{01}
to Lemma~\ref{04}.
\begin{lemma} If ${\mathbf a}$, ${\mathbf b}\in{\mathbb C}^{n}$,
then
\[||{\mathbf a}+{\mathbf b}||^{2}
-||{\mathbf a}-{\mathbf b}||^{2}
+i||{\mathbf a}+i{\mathbf b}||^{2}
-i||{\mathbf a}-i{\mathbf b}||^{2}=
4\langle{\mathbf a},{\mathbf b}\rangle.\]
\end{lemma}
\begin{definition} If $A$ is the $n\times n$
complex matrix $(a_{ij})$,
then $A^{*}$ (the adjoint of $A$) is the
$n\times n$ matrix $(b_{ij})$ with $b_{ij}=a_{ji}^{*}$
$[1\leq i,j\leq n]$.
\end{definition}
\begin{theorem} Let $\alpha:{\mathbb C}^{n}\rightarrow{\mathbb C}^{n}$
be linear.
The following statements are equivalent.
(i) $||\alpha {\mathbf x}||=||{\mathbf x}||$ for all
${\mathbf x}\in{\mathbb C}^{n}$.
(ii) $\langle\alpha{\mathbf x},\alpha{\mathbf y}\rangle
=\langle{\mathbf x},{\mathbf y}\rangle$ for all
${\mathbf x},{\mathbf y}\in{\mathbb C}^{n}$.
(iii) If $\alpha$ has matrix $A$ with respect to some
orthonormal basis, then \mbox{$AA^{*}=I$}.
\end{theorem}
A complex $n\times n$ matrix $A$ with $AA^{*}=I$ is called
a unitary matrix.
If we think of the linear maps
as central, we refer to the collection of norm-preserving
linear maps by the name $U(n)$. If we think of the
matrices as central, we refer to the collection of complex
$n\times n$ matrices $A$ with $AA^{*}=I$ by the name $U(n)$.
\begin{lemma} If $A$ is a unitary matrix then
$|\det A|=1$.
\end{lemma}
If we think in terms of linear maps, we define
\[SU(n)=\{\alpha\in U(n):\det\alpha=1\}.\]
If we think in terms of matrices, we define
\[SU(n)=\{A\in U(n):\det A=1\}.\]
(The letter $U$ stands for `unitary', the letters
$SU$ for `special unitary'.)
\begin{lemma} $U(n)$ is a group and $SU(n)$ a subgroup of $U(n)$.
\end{lemma}
Not surprisingly, the generalisation of the symmetric matrix
from the real case turns out to involve $^{*}$ rather than $^{T}$.
The reader should have no difficulty in proving the
following parallel to Lemma~\ref{K1}.
\begin{lemma}
Let $\alpha:{\mathbb C}^{n}\rightarrow{\mathbb C}^{n}$
be linear.
The following statements are equivalent.
(i) $\langle\alpha{\mathbf x},{\mathbf y}\rangle
=\langle{\mathbf x},\alpha{\mathbf y}\rangle$ for all
${\mathbf x},{\mathbf y}\in{\mathbb C}^{n}$.
(ii) If $\alpha$ has matrix $A$ with respect to some
orthonormal basis, then $A=A^{*}$.
\end{lemma}
We call an $\alpha$, having the properties just described,
Hermitian. We can also call $\alpha$ a `self-adjoint' map.
Again, the reader should have no problems proving
the following versions of Theorems~\ref{Real} and~\ref{Perpendicular}
\begin{theorem}
If $\alpha:{\mathbb C}^{n}\rightarrow{\mathbb C}^{n}$
is Hermitian, then all the roots of the characteristic polynomial
$\det(t\iota-\alpha)$ are real.
\end{theorem}
\begin{theorem}
If $\alpha:{\mathbb C}^{n}\rightarrow{\mathbb C}^{n}$
is Hermitian, then eigenvectors corresponding to distinct
eigenvalues are orthogonal.
\end{theorem}
As we might expect, the linear algebra course will contain a proof of the
following result (compare Fact~\ref{Hermite}).
\begin{fact}\label{happy} (i) The map
$\alpha:{\mathbb C}^{n}\rightarrow{\mathbb C}^{n}$
is Hermitian if and only if there exists an orthonormal
basis of eigenvectors of ${\mathbb C}^{n}$ with respect
to which $\alpha$ has a diagonal matrix with real entries.
(ii) The $n\times n$ complex matrix $A$ is Hermitian
if and only if there exists a
matrix $P\in SU(n)$ such that $P^{*}AP$ is diagonal
with real entries.
\end{fact}
In order to keep things simple for the rest of the discussion,
we shall confine ourselves
to the two dimensional space ${\mathbb C}^{2}$,
but almost everything carries over to higher dimensions.
\begin{lemma} (i) If $\alpha\in U(2)$, then we can find an
orthonormal basis for ${\mathbb C}^{2}$ with respect to
which $\alpha$ has matrix
\[\begin{pmatrix}e^{i\theta}&0\\0&e^{i\phi}\end{pmatrix}\]
with $\theta,\phi\in{\mathbb R}$.
Conversely, any $\alpha$ with such a matrix
representation is in $U(2)$.
(ii) If $\alpha\in SU(2)$, then we can find an
orthonormal basis for ${\mathbb C}^{2}$ with respect to
which $\alpha$ has matrix
\[\begin{pmatrix}e^{i\theta}&0\\0&e^{-i\theta}\end{pmatrix}\]
with $\theta\in{\mathbb R}$.
Conversely, any $\alpha$ with such a matrix
representation is in $SU(2)$.
(iii) If $\beta:{\mathbb C}^{2}\rightarrow{\mathbb C}^{2}$
is Hermitian, then we can find an
orthonormal basis for ${\mathbb C}^{2}$ with respect to
which $\beta$ has matrix
\[\begin{pmatrix}\lambda&0\\0&\mu\end{pmatrix}\]
with $\lambda,\mu\in{\mathbb R}$.
Conversely, any $\beta$ with such a matrix
representation is Hermitian.
\end{lemma}
(Note that we can prove (iii) directly without appealing to
Fact~\ref{happy}.)
The discussion now takes off into the wild blue yonder.
Readers who are already confused should stop reading here
(indeed they could throw away the whole of this last section
without real loss). Those who read on can treat
what follows as a formal exercise (though, rather surprisingly,
it is actually rigorous).
\begin{lemma} If
$\alpha:{\mathbb C}^{2}\rightarrow{\mathbb C}^{2}$
is linear, then
(i) We can find a $K$ such that
\[||\alpha{\mathbf x}||\leq K||{\mathbf x}||\]
for all ${\mathbf x}\in{\mathbb C}^{2}$.
(ii) With the notation of (i),
\[||\alpha^{n}{\mathbf x}||\leq K^{n}||{\mathbf x}||.\]
(iii) If ${\mathbf x}\in {\mathbb C}^{2}$, then
$\sum_{n=0}^{\infty}\alpha^{n}{\mathbf x}/n!$ converges
to a limit which we call
$\exp(\alpha){\mathbf x}$. More formally,
we can find $\exp(\alpha){\mathbf x}\in {\mathbb C}^{2}$
such that
\[\left|\left|\sum_{n=0}^{N}\frac{1}{n!}\alpha^{n}{\mathbf x}
-\exp(\alpha){\mathbf x}\right|\right|\rightarrow 0\]
as $N\rightarrow\infty$.
(iv) With the notation of (iii),
$\exp(\alpha):{\mathbb C}^{2}\rightarrow{\mathbb C}^{2}$
is a linear map.
(v) If $\alpha$ has matrix $A$ with respect to a given basis, then,
with respect to the same basis, $\exp(\alpha)$ has matrix
\[\exp(A)=\sum_{n=0}^{\infty}\frac{1}{n!}A^{n}\]
with the sum taken in the obvious (component by component) way.
\end{lemma}
\noindent IMPORTANT WARNING In general
$\exp(\alpha)\exp(\beta)\neq\exp(\alpha+\beta)$.
Before stating our final result we need a definition and
an accompanying remark.
\begin{definition} If $\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$
is linear, then the trace $\tr(\alpha)$ is defined to be
minus the coefficient
of $t^{n-1}$ in the characteristic polynomial $\det(t\iota-\alpha)$.
\end{definition}
\begin{lemma} If the linear map
$\alpha:{\mathbb F}^{n}\rightarrow{\mathbb F}^{n}$
has matrix $A$ with respect to some basis then
\[\tr(\alpha)=\sum_{j=1}^{n}a_{jj}.\]
\end{lemma}
We call $\sum_{j=1}^{n}a_{jj}$ the trace of $A$ and write it $\tr A$.
The trace will be discussed again in the linear algebra course.
\begin{lemma} (i) The map $\alpha\in SU(2)$ if and only if
$\alpha=e^{i\beta}$
with $\beta$ Hermitian of trace 0.
(ii) The $2\times 2$ matrix $A$ is in $SU(2)$ (considered as a matrix
group) if and only if
\[A=\exp(i(a_{1} S_{1}+a_{2}S_{2}+a_{3}S_{3}))\]
with $a_{1}$, $a_{2}$, $a_{3}$ real and
\[S_{1}=\begin{pmatrix}0&1\\1&0\end{pmatrix},
\ S_{2}=\begin{pmatrix}0&-i\\i&0\end{pmatrix},
\ S_{3}=\begin{pmatrix}1&0\\0&-1\end{pmatrix}.\]
\end{lemma}
The matrices $S_{1}$, $S_{2}$ and $S_{3}$ are called the Pauli spin
matrices. They will turn up together with the group $SU(2)$
in the Part II Quantum Mechanics courses. Had we gone through
the argument above with $SU(3)$ in place of $SU(2)$, we would
have obtained eight real parameters $a_{1}$, $a_{2}$,
\dots, $a_{8}$ in place of the three just found, and so
heard a distant echo of the famous eight-fold way
of Gell-Mann and Ne'eman.
\section{Books} Professor Beardon has just brought out a book
\emph{Algebra and Geometry}
which is very close in spirit and content to this course.
If you want just one algebra text to see
you through Parts~1A and~1B then C.~W.~Norman \emph{Undergraduate
Algebra} is very user friendly. P.~M.~Cohn's
\emph{Algebra} (Volume~1) is a more highbrow text
but will appeal to future algebraists.
All three books should be in your
college library. (If not the librarian will be pleased
to order them.) In general you should first consult
textbooks in your library and only buy one when you have
used it successfully for some time.
\section{First exercise set}
Most of the exercises in these exercise sets
are taken from earlier sheets of Professor Beardon. In each case,
the first five or so exercises are intended to be short
and any exercises after the first twelve are for enthusiasts.
(The extra questions may come in handy for revision,
or your supervisor may choose a different selection
of questions or one of the extra questions such as
Exercise~\ref{Euler onwards} or~\ref{Vandermonde}
may catch your fancy.)
We will take ${\mathbf e}_{1}$, ${\mathbf e}_{2}$
\dots, ${\mathbf e}_{n}$ to be the standard basis of
${\mathbb R}^{n}$.
Unless otherwise stated,
matrices act on ${\mathbb R}^{n}$ with respect to this basis.
\begin{question}\label{C1.1} (a) Find a $3\times 3$ real matrix
with eigenvalues $1,\,i,\,-i$. [Think geometrically.]
(b) Construct a $3\times 3$ non-zero real matrix
which has all three eigenvalues zero.
\end{question}
\begin{question}\label{C1.2}
(a) Let $A$ be a square matrix such that $A^m=0$
for some integer $m$.
Show that every eigenvalue of $A$ is zero.
(b) Let $A$ be a real $2\times 2$ matrix which has
non-zero non-real eigenvalues.
Show that the non-diagonal elements of $A$ are non-zero,
but that the diagonal elements may be zero.
\end{question}
\begin{question}\label{C1.3}
Suppose that $A$ is an $n\times n$ square matrix
and that $A^{-1}$ exists. Show that if $A$ has
characteristic equation
$a_{0} + a_{1}t + \dots + a_{n}t^{n} = 0$, then $A^{-1}$ has
characteristic equation
\[(-1)^{n} \det (A^{-1})(a_{n} + a_{n-1}t + \dots + a_{0}t^{n}) = 0.\]
[{\bf Note} : take $n=3$ in this question if you wish,
but treat the general case if you can.
It should be clear that $\lambda$ is an eigenvalue of $A$
if and only if $1/\lambda$ is an eigenvalue of $A^{-1}$,
but this result says more than this
(about multiplicities of eigenvalues).
You should use properties of the determinant to solve this
problem, for example, $\det (A)\det (B) = \det (AB)$.
You should also state explicitly why we do not need to
worry about zero eigenvalues.]
\end{question}
\begin{question}\label{C1.4}
Show that the matrix
\[A =
\left(
\begin{matrix}0&1&0\\-4&4&0\\-2&1&2\end{matrix}
\right)
\]
has characteristic equation $(t-2)^3 = 0$.
Explain (without doing any further calculations) why $A$
is not diagonalisable.
\end{question}
\begin{question}\label{C1.5} (i)
Find $a$, $b$ and $c$ such that the matrix
\[
\left(
\begin{matrix}1/3&0&a\\2/3&1/{\sqrt 2}&b\\
2/3&-1/{\sqrt 2}&c
\end{matrix}
\right)
\]
is orthogonal. Does this condition
determine $a$, $b$ and $c$, uniquely?
(ii) (Exercise~\ref{E;non symmetric converse}) Let
\[A=\left(\begin{matrix}2&0\\0&1\end{matrix}\right)
\ \text{and}
\ P=\left(\begin{matrix}1&0\\1&1\end{matrix}\right).\]
Compute $PAP^{-1}$ and observe that it is not a symmetric
matrix, although $A$ is. Why does this not contradict
Lemma~\ref{L;symmmetric converse}.
\end{question}
\begin{question}\label{C1.6}
Let
\[A=
\left(
\begin{matrix} 3&4\\-1&-1
\end{matrix}
\right).
\]
Find the characteristic
equation for $A$.
Verify\footnote{See Example~\ref{Cayley--Hamilton}.}
that $A^{2} = 2A-I$. Is $A$ diagonalisable ?
Show by induction that $A^{n}$ lies in the
two-dimensional subspace (of the space of
$2\times 2$ real matrices) spanned by $A$ and $I$,
so that there exists real numbers $\alpha_{n}$ and $\beta_{n}$ with
\[A^{n} = \alpha_{n}A + \beta_{n}I.\]
Use the fact that $A^{n+1} = AA^{n}$ to find a
recurrence relation (i.e., a difference equation)
for $\alpha_{n}$ and $\beta_{n}$. Solve these
and hence find an explicit formula for $A^{n}$.
Verify this formula by induction.
\end{question}
\begin{question}\label{C1.7}
For each of the three matrices below,
(a) compute their eigenvalues
(as often happens in exercises and seldom in real life
each eigenvalue is a small integer);
(b) for each real eigenvalue $\lambda$
compute the dimension of the eigenspace
$\{{\mathbf x} \in {\mathbb R}^{3}\,:\, A{\mathbf x} =
\lambda {\mathbf x}\}$;
(c) determine whether or not the matrix
is diagonalisable as a map of ${\mathbb R}^{3}$ into itself.
\[
\left(
\begin{matrix}
5&-3&2\\6&-4&4\\4&-4&5
\end{matrix}
\right),
\ \left(
\begin{matrix}
1&-3&4\\4&-7&8\\6&-7&7
\end{matrix}
\right),
\ \left(
\begin{matrix}
7&-12&6\\10&-19&10\\12&-24&13
\end{matrix}
\right).
\]
\end{question}
\begin{question}\label{C1.8}
Determine the eigenvalues and eigenvectors of the symmetric matrix
\[A =
\left(
\begin{matrix}
3&1&1\\1&2&0\\1&0&2
\end{matrix}
\right).\]
Use an identity of the form $PAP^{T}=D$,
where $D$ is a diagonal matrix, to find $A^{-1}$.
\end{question}
\begin{question}\label{C1.9}
The object of this exercise is to show why finding eigenvalues
of a large matrix is not just a matter of finding a large
fast computer.
Consider the $n\times n$ complex matrix $A=(a_{ij})$ given by
\begin{align*}
a_{j\,j+1}&=1&&\qquad\text{for $1\leq j\leq n-1$}\\
a_{n1}&=\kappa^{n}\\
a_{ij}&=0&&\qquad\text{otherwise,}
\end{align*}
where $\kappa\in{\mathbb C}$ is non-zero. Thus, when
$n=2$ and $n=3$, we get the matrices
\[
\left(
\begin{matrix}
0&1\\
\kappa^{2}&0
\end{matrix}
\right)
\ \text{and}
\ \left(
\begin{matrix}
0&1&0\\
0&0&1\\
\kappa^{3}&0&0
\end{matrix}
\right).
\]
(i) Find the eigenvalues and associated eigenvectors of $A$
for $n=2$ and $n=3$. (Note that we are working over ${\mathbb C}$
so we must consider complex roots.)
(ii) By guessing and then verifying your answers, or otherwise,
find the eigenvalues and associated eigenvectors of $A$
for for all $n\geq 2$.
(iii) Suppose that your computer works to $15$ decimal places
and that $n=100$. You decide to find the eigenvalues of $A$
in the cases $\kappa=2^{-1}$ and $\kappa=3^{-1}$. Explain
why at least one (and more probably) both attempts will
deliver answers which bear no relation to the true answers.
\end{question}
\begin{question}\label{C1.10}
(a) Let
\[A =
\left(
\begin{matrix}
2&-2\\-2&5
\end{matrix}
\right)
\]
Find an orthogonal matrix $P$ such that
$P^{T}AP$ is diagonal and use $P$ to
diagonalise the quadratic form
\[Q(x,y) = 2x^2 -4xy + 5y^2.\]
(b) Diagonalise the quadratic form
\[(a\cos^{2}\theta + b\sin^{2}\theta)x^2
+2(a-b)(\sin\theta\cos\theta)xy +
(a\sin^{2}\theta + b\cos^{2}\theta)y^{2}.\]
\end{question}
\begin{question}\label{C1.11}
Find all eigenvalues, and an orthonormal set of eigenvectors,
of the matrices
\[A=
\left(
\begin{matrix}
5&0&{\sqrt 3}\\0&3&0\\{\sqrt 3}&0&3
\end{matrix}
\right)
\ \text{and}
\ B=
\left(
\begin{matrix}
2&-1&-1\\-1&2&-1\\-1&-1&2 .
\end{matrix}
\right)
\]
Hence sketch the surfaces
\[5x^2+3y^2+3z^2+2{\sqrt 3}xz = 1
\ \text{and}
\ x^2+y^2+z^2-xy-yz-zx=1.\]
\end{question}
\begin{question}\label{C1.12}
Use Gaussian elimination to solve the following two
systems of integer \emph{congruences} modulo 7.
\begin{align*}
x+y+z&\equiv 3\\
x+2y+3z&\equiv 1\\
x+4y+2z&\equiv 1.
\end{align*}
Do the same for
\begin{align*}
x+y+6z&\equiv 2\\
x+2y+5z&\equiv 4\\
x+4y+3z&\equiv 1.
\end{align*}
Write down a third equation which makes the
system
\begin{align*}
x+y+6z&\equiv 2\\
x+2y+5z&\equiv 4
\end{align*}
insoluble and show that you have done so.
\end{question}
\noindent\hrulefill
\begin{question}\label{C1.13}
(a) Suppose that a $3\times3$ real matrix $A$
acting on ${\mathbb R}^{3}$ has eigenvalues
$\lambda$, $\mu$ and $\bar{\mu}$, where
$\lambda$ is real, $\mu$ is complex and non-real,
and $\bar{\mu}$ is the complex conjugate of $\mu$.
Suppose also that ${\mathbf u}$ is a real eigenvector
for $\lambda$ and that ${\mathbf v}$ is a complex
eigenvector for $\mu$, where
${\mathbf v} ={\mathbf v}_{1} +i{\mathbf v}_{2}$, and
${\mathbf v}_{1}$ and ${\mathbf v}_{2}$ are real vectors.
Show that ${\mathbf v}_{1} -i{\mathbf v}_{2}$ is a complex
eigenvector for $\bar{\mu}$.
Assume that the vectors ${\mathbf u}$,
${\mathbf v}_{1}$, ${\mathbf v}_{2}$ are linearly independent.
Show that $A$ maps the plane $\Pi$ in
${\mathbb R}^3$ spanned by
${\mathbf v}_{1}$ and ${\mathbf v}_2$ into itself,
and that $\Pi$ contains no eigenvectors of $A$.
(b) Illustrate the previous paragraph with the
case of a rotation of ${\mathbb R}^{3}$ of angle $\pi /4$
about the axis along ${\mathbf e}_1$.
(c) Illustrate (a) again by taking
\[A =
\left(
\begin{matrix}
4&{-5}&7\\
1&-4&9\\
-4&0&5
\end{matrix}
\right),
\]
\noindent
and show that in this case $\Pi$ has equation $2x-2y+z=0$.
\end{question}
\begin{question}\label{C1.14}
Let $\Sigma$ be the surface in ${\mathbb R}^{3}$ given by
\[2x^2 +2xy+4yz+z^2 = 1.\]
By writing this equation as
\[{\mathbf x}^{T}A{\mathbf x}=1,\]
with $A$ a real symmetric matrix, show that there
is an orthonormal basis such that, if we use
coordinates $(u,v,w)$ with respect to this new basis,
$\Sigma$ takes the form
\[\lambda u^2 + \mu v^2 + \nu w^2 = 1.\]
Find $\lambda$, $\mu$ and $\nu$ and hence find the
minimum distance between the origin and $\Sigma$.
[It is {\bf not} necessary to find the basis
explicitly.]
\end{question}
\begin{question}\label{C1.15}
(This is another way of proving
$\det AB=\det A\det B$. You may wish to stick to
the case $n=3$.)
If $1\leq r,s\leq n$, $r\neq s$ and $\lambda$ is real,
let $E(\lambda,r,s)$ be an
$n\times n$ matrix with
$(i,j)$ entry $\delta_{ij}+\lambda\delta_{ir}\delta_{js}$.
If $1\leq r\leq n$ and $\mu$ is real,
let $F(\mu,r)$ be an
$n\times n$ matrix with
$(i,j)$ entry $\delta_{ij}+(\mu-1)\delta_{ir}\delta_{jr}$.
(i) Give a simple geometric interpretation of the linear maps
from ${\mathbb R}^{n}$ to ${\mathbb R}^{n}$ associated with
$E(\lambda,r,s)$ and $F(\mu,r)$.
(ii) Give a simple account of the effect of pre-multiplying
an $n\times m$ matrix by $E(\lambda,r,s)$ and by $F(\mu,r)$.
What is the effect of post-multiplying an $m\times n$ matrix?
(iii) If $A$ is an $n\times n$ matrix, find
$\det(E(\lambda,r,s)A)$
and $\det(F(\mu,r)A)$ in terms of $\det A$.
(iv) Show that every $n\times n$ matrix is the product of
matrices of the form $E(\lambda,r,s)$ and $F(\mu,r)$
and a diagonal matrix with entries $0$ or $1$.
(v) Use (iii) and (iv) to show that, if $A$ and $B$ are
$n\times n$ matrices, then $\det A\det B=\det AB$.
\end{question}
\begin{question}\label{C1.16}
Show that a rotation about the $z$ axis through an
angle $\theta$ corresponds to the matrix
\[{\mathbf R}=\left(
\begin{matrix}
\cos\theta&-\sin\theta&0\\
\sin\theta&\cos\theta&0\\
0&0&1
\end{matrix}\right).\]
Write down a real eigenvector of ${\mathbf R}$ and give the
corresponding eigenvalue.
In the case of a matrix corresponding to a general rotation,
how can one find the axis of rotation?
A rotation through $45^{\circ}$ about the $x$-axis is followed
by a similar one about the $z$-axis. Show that the rotation
corresponding to their combined effect has its axis inclined
at equal angles
\[\cos^{-1}\frac{1}{\surd(5-2\surd 2)}\]
to the $x$ and $z$ axes.
\end{question}
\begin{question}\label{C1.17}\label{Euler onwards}
(i) If $\beta:{\mathbb R}^{n}\rightarrow{\mathbb R}^{n}$
is an orthogonal map which fixes two orthonormal vectors ${\mathbf e}_{1}$ and
${\mathbf e}_{2}$, show that if, ${\mathbf x}$ is perpendicular to
${\mathbf e}_{1}$ and ${\mathbf e}_{2}$, then $\beta{\mathbf x}$
is perpendicular to ${\mathbf e}_{1}$ and ${\mathbf e}_{2}$.
(ii) Use the ideas of Lemma~\ref{rotation} and the surrounding
lemmas to show that,
if $n\geq 3$, then there is an
orthonormal basis of ${\mathbb R}^{n}$ with respect to which
$\beta$ has matrix
\[\left(\begin{matrix}C&O_{2,n-2}\\O_{n-2,2}&B\end{matrix}\right)\]
where $O_{r,s}$ is a $r\times s$ matrix of zeros,
$B$ is an $(n-2)\times (n-2)$
orthogonal matrix and
\[C=\left(\begin{matrix}\cos\theta&-\sin\theta\\
\sin\theta&\cos\theta\end{matrix}\right)\]
for some real $\theta$.
(iii) Show that, if $n=4$, then, if $\beta$ is special orthogonal, we can find
an orthonormal basis of ${\mathbb R}^{4}$ with respect to which
$\beta$ has matrix
\[\left(\begin{matrix}
\cos\theta_{1}&-\sin\theta_{1}&0&0\\
\sin\theta_{1}&\cos\theta_{1}&0&0\\
0&0&\cos\theta_{2}&-\sin\theta_{2}\\
0&0&\sin\theta_{2}&\cos\theta_{2}
\end{matrix}\right),\]
for some real $\theta_{1}$ and $\theta_{2}$, whilst, if
$\beta$ is not special orthogonal, we can find
an orthonormal basis of ${\mathbb R}^{4}$ with respect to which
$\beta$ has matrix
\[\left(\begin{matrix}
-1&0&0&0\\
0&1&0&0\\
0&0&\cos\theta&-\sin\theta\\
0&0&\sin\theta&\cos\theta
\end{matrix}\right),\]
for some real $\theta$.
(iv) What happens if we take $n=5$? What happens for general $n$?
Prove your statements.
\end{question}
\section{Second exercise set}
Most of the exercises in these exercise sets
are taken from earlier sheets of Professor Beardon. In each case,
the first five or so exercises are intended to be short
and any exercises after the first twelve are for enthusiasts.
(The extra questions may come in handy for revision,
or your supervisor may choose a different selection
of questions or one of the extra questions such as
Exercise~\ref{Euler onwards} or~\ref{Vandermonde}
may catch your fancy.)
\begin{question}\label{C2.1}\label{Left right}
Throughout this question $(G,*)$ will be set $G$
with a multiplication $*$ such that
$a*b\in G$ whenever $a,\,b\in G$ whilst
\[a*(b*c)=(a*b)*c\ \text{for all $a,\,b,\,c\in G$}.\]
(i) Suppose that there exist $e_{R}$ and $e_{L}$ in $G$ such that
\[a*e_{R}=e_{L}*a=a\ \text{for all $a\in G$}.\]
Show that $e_{R}=e_{L}$. How does this show that the unit in
a group is unique?
(ii) Suppose that there exists an $e\in G$ such that
$a*e=e*a=a$ for all $a\in G$. Show that, if $x\in G$ and we can find
$x_{R}$ and $x_{L}$ in $G$ such that
\[x*x_{R}=x_{L}*x=e,\]
then $x_{R}=x_{L}$. How does this show that the inverse
of any element in a group is unique?
(iii) Suppose now that $(G,*)$ is a group.
If $H$ is a non-empty subset of $G$ such that
$ab^{-1}\in H$ whenever $a,\,b\in H$, show
that $H$ is a subgroup of $G$.
\end{question}
\begin{question}\label{C2.2}
Let ${\mathbf Z}$ be the group of integers under addition.
What is the subgroup of ${\mathbf Z}$ generated
(i) by $530$ and $27$?
(ii) by $531$ and $27$?
\noindent[Recall that the subgroup generated by a subset $E$
of a group is the smallest subgroup of $G$ containing $E$.
Consider the greatest common divisor.]
\end{question}
\begin{question}\label{C2.3}
Show that if $H$ and $K$ are subgroups of a group $G$,
then $H \cap K$ is also a subgroup of $G$. Show also
that, if $H$ and $K$ have orders $p$ and $q$, respectively,
where $p$ and $q$ are coprime, then $H\cap K$ contains
only the identity element $e$ of $G$.
\end{question}
\begin{question}\label{C2.4}
Show that the set $G$ of matrices of the form
\[
\left(
\begin{matrix}
z&w\\-\bar{w}&\bar{z}
\end{matrix}
\right),
\]
where $z$ and $w$ are complex numbers with $|z|^{2}+|w|^{2}\neq 0$,
is a non-Abelian group under multiplication.
Show that the set $H$ of matrices of the form
\[
\left(
\begin{matrix}
z&w\\-\bar{w}&\bar{z}
\end{matrix}
\right),
\]
where $z$ and $w$ are complex numbers with $|z|^{2}+|w|^{2}=1$,
is a subgroup of $G$. Is $H$ a normal subgroup of $G$?
[Think determinants. $H$ is normal if $ABA^{-1}\in H$
whenever $A\in G$ and $B\in H$.] Is $H$ Abelian? Is $H$ infinite?
In each case, give reasons.
\end{question}
\begin{question}\label{C2.5}
Let $G$ be the set of complex numbers of
the form $\exp i\pi q$ with $q$ rational. Show that
$G$ is an Abelian group under multiplication. Show that $G$ is infinite
but that every element of $G$ is of finite order.
Can you find an infinite group
in which every element except the identity is
of infinite order? Give reasons.
\end{question}
\begin{question}\label{C2.6}
Let ${\mathcal P}$ be a solid triangular prism with an equilateral
triangle as cross-section and ends orthogonal to the longitudinal
axis of the prism (a `Toblerone' bar). Find the group of
rotations which leaves ${\mathcal P}$ invariant.
Can you show that it is isomorphic to a standard group?
[It may be helpful to proceed as follows. First find the
number of elements in the group. Now find generators
for the group and relations between them. Now see if you
can identify the group as isomorphic to one you already
know.]
Find the group of
rotations and reflections which leaves ${\mathcal P}$ invariant.
Can you show that it is isomorphic to a standard group?
\end{question}
\begin{question}\label{C2.7}
Suppose $G$ is group in which every element
other than the identity has order $2$. By evaluating
$x(xy)^{2}y$ in two ways, show that $xy=yx$ for all
$x,\,y\in G$. If the identity $e$, $x$ and $y$
are all distinct, show that the set $\{e,\,x,\,y,\,xy\}$
is a subgroup of $G$ of order exactly $4$.
Use Lagrange's theorem to show that any group of order $2p$,
where $p$ is an odd prime, must contain an element of order $p$.
\end{question}
\begin{question}\label{C2.8}
Consider the four matrices
\[{\boldsymbol 1}=
\left(
\begin{matrix}
1&0\\0&1
\end{matrix}
\right),
\ {\mathbf i}=\left(
\begin{matrix}
i&0\\0&-i
\end{matrix}
\right),
\ {\mathbf j}=\left(
\begin{matrix}
0&1\\-1&0
\end{matrix}
\right),
\ {\mathbf k}=\left(
\begin{matrix}
0&i\\i&0
\end{matrix}
\right).
\]
Compute ${\mathbf i}^{2}$, ${\mathbf j}^{2}$, ${\mathbf k}^{2}$,
${\mathbf i}{\mathbf j}$, ${\mathbf j}{\mathbf k}$,
${\mathbf k}{\mathbf i}$,
${\mathbf j}{\mathbf i}$, ${\mathbf k}{\mathbf j}$,
${\mathbf i}{\mathbf k}$
and show that the four matrices generate a group ${\mathbf Q}$
of order $8$. Is the group ${\mathbf Q}$ Abelian?
How many subgroups
of order $4$ does it have?
\end{question}
\begin{question}\label{C2.9}
(a) Consider the functions $f:A\rightarrow B$, $g:B\rightarrow C$
and their composition $g\circ f:A\rightarrow C$ given by
$g\circ f(a)=g(f(a))$. Prove the following results.
(i) If $f$ and $g$ are surjective, then so is $g\circ f$.
(ii) If $f$ and $g$ are injective, then so is $g\circ f$.
(iii) If $g\circ f$ is injective, then so is $f$.
(iv) If $g\circ f$ is surjective, then so is $g$.
(b) Give an example where $g\circ f$ is injective and surjective
but $f$ is not surjective and $g$ is not injective.
(c) If any of your proofs of parts~(i) to~(iv) of~(a)
involved
contradiction, reprove them without such
arguments\footnote{Conway
refers to arguments of the form
`Assume $X$ is true but $Y$ is false. Since $X$ implies $Y$
it follows that $Y$ is true. This contradicts our original
assumption. Thus $X$ implies $Y$.' as
`absurd absurdums'.}.
(d) Have you given the simplest possible example in~(b)?
(If you feel that this is not a proper question, let us
ask instead for the smallest possible sets $A$ and $B$.)
\end{question}
\begin{question}\label{C2.10}
Three fixed lines $a$, $b$, $c$ are the sides of an
equilateral triangle, and ${\bf A}$, ${\bf B}$ ${\bf C}$
denote the operations of reflection in $a$, $b$, $c$
respectively. Describe the operations ${\bf AB}$, ${\bf CB}$,
${\bf CBAB}$. (You should use the composition law
$(fg)(x)=f(g(x))$.)
Show that there exists an infinite group $G$ containing
elements $p$, $q$, $r$ such that
(i) $G$ is generated by $p$, $q$, $r$;
(ii) $p^{2}=q^{2}=r^{2}=(qr)^{3}=(rp)^{3}=(pq)^{3}=e.$
\end{question}
\begin{question}\label{C2.11}
For any two sets $A$ and $B$ the symmetric difference
$A\triangle B$ of $A$ and $B$ is the set of elements in
\emph{exactly one} of $A$ and $B$.
Let $\Omega$ be a non-empty set and let $G$ be the set of subsets of
$\Omega$ (note that $G$ includes both the empty set
$\emptyset$ and $\Omega$).
Show that $G$ with the operation $\triangle$ is an abelian group.
[\emph{Hint} : the identity element is likely to be either $\emptyset$
or $\Omega$ as no other `special' element presents itself.
\emph{Remark} : this is an example in which the associative
law is not entirely trivial.]
Let $\Omega=\{1,\,\dots,\,7\}$ and let
$A =\{1,\,2,\,3\}$, $B=\{3,\,4,\,5\}$, $C=\{5,\,6,\,7\}$.
Find $X$ in $G$ such that $A\triangle X \triangle B = C$.
\end{question}
\begin{question}\label{C2.12}
Let $C_{n}$ be the cyclic group with $n$
elements and $D_{2n}$ the dihedral group with $2n$
elements (i.e., the group of symmetries of the regular
$n$-gon\footnote{Observe my inability to keep to a consistent
choice between $D_{n}$ and $D_{2n}$.}). If $n$ is odd
and $f:D_{2n}\rightarrow C_{n}$ is a homomorphism,
show that $f(x)=e$ for all $x\in D_{2n}$.
What can we say if $n$ is even?
Find all the homomorphisms from the cyclic group $C_{n}$
of order $n$ generated by $a$, say, to $C_{m}$ the
cyclic group generated by $b$, say. If $n=m$, show that
there are $\phi(n)$ isomorphisms, where $\phi(n)$ is
the number of integers between $0$ and $n-1$ coprime
to $n$ (Euler's totient function).
\end{question}
\noindent\hrulefill
\begin{question}\label{C2.13}
(This question will be accessible to
those who are doing the course `Numbers and Sets',
it may or may not be accessible
to others.) State what is meant by an \emph{equivalence
relation} between members of a set $E$ and show that an
equivalence relation defines a partition of $E$ into
disjoint sets.
A relation $\sim$ between $m\times n$ matrices $A$ and $B$
is defined as follows: $A\sim B$ if there exists a non-singular
$m\times m$ matrix $P$ and a non-singular $n\times n$ matrix $Q$
such that
\[PAQ=B.\]
Show that this is an equivalence relation.
State a criterion for $A$ to be equivalent to
\[\left[\begin{array}{cc} I_{r}&0\\0&0\
\end{array}\right],\]
where $I_{r}$ is the unit $r\times r$ matrix, and deduce the number
of equivalence classes.
\end{question}
\begin{question}\label{C2.14}
Giving adequate justification for your answers,
state which of the following sets of $n\times n$
matrices are groups under the usual operation
of matrix multiplication (in each case $n\geq 2$)
(i) the matrices $A$ with $a_{11}=1;$
(ii) the set consisting of the zero matrix only;
(iii) the matrices with a positive non-zero determinant;
(iv) the matrices with determinant zero;
(v) the matrices whose determinant is a non-zero integer;
(vi) the matrices $A$ such that $a_{ij}$ is an integer
for all $(i,j)$ and $\det A=1$.
\end{question}
\begin{question}\label{C2.15} We work in ${\mathbb R}^{2}$.
Let $R_{1}$ be the rectangle with vertices $(0,0)$, $(1,0)$,
$(1,2)$, $(0,2)$ and let $R_{2}$ be the rectangle
with vertices $(0,0)$, $(2,0)$,
$(2,1)$, $(0,1)$. Find all the isometries
which map $R_{1}$ to $R_{2}$ and show that
you have indeed found all of them.
\end{question}
\begin{question}\label{C2.16}
Throughout this question $G$ is a group and $K$ a non-empty subset of $G$.
(i) Give an example of a finite group $G$ and a non-empty subset
$K$ such that $x^{-1}Kx=K$ for all $x\in G$ but $K$ is not
a normal subgroup of $G$.
(ii) Show that if $K$ is finite and $Kx\subseteq K$
for some $x\in G$, then $Kx=K$.
(iii) Give an example to show that~(ii) is false if we drop
the condition $K$ finite.
(iv) If $Kx=K$ for all $x\in G$, show that $K=G$.
\end{question}
\begin{question}\label{C2.17}
From time to time the lecturer and your supervisor may
mention the notion of `a group generated by certain elements'
(eg in the hint to Exercise~\ref{C2.6}). It is not necessary
to make this notion precise at 1A~level. If you are interested,
this exercise shows how it can be made precise. The arguments are easy
but not worth doing unless you formulate them carefully.
(i) Let $G$ be a group and let $\{H_{\alpha}\,:\,\alpha\in A\}$
be a non-empty collection of subgroups of $G$. Show that
$\bigcap_{\alpha\in A}H_{\alpha}$ is a subgroup of $G$.
(ii) Let $G$ be a group and $X$ a non-empty subset of $G$.
Explain why the collection ${\mathcal G}_{X}$ of subgroups
of $G$ containing $X$ is non-empty. We call
\[\gen X=\bigcap_{H\in {\mathcal G}_{X}}H\]
the subgroup of $G$ generated by $X$.
(iii) Show that if $G$ and $X$ are as in~(ii), then
there is unique subgroup $K$ of $G$ containing $X$
with the property
that, if $H$ is a subgroup of $G$ containing $X$,
then $H\supseteq K$. Show also that $K=\gen X$.
(iv) If $G$ and $X$ are as in~(ii), show that
$\gen X$ consists of the unit $e$ together with
all elements
\[\prod_{i=1}^{N}g_{i}^{\epsilon_{i}}=
g_{1}^{\epsilon_{1}}g_{2}^{\epsilon_{1}}\dots g_{N}^{\epsilon_{N}}\]
with $\epsilon_{i}=\pm 1$, $g_{i}\in X$ $[1\leq i\leq N]$
and $N\geq 1$.
(v) [In the remainder of this question we use the notion of generator
to bound the number of non-isomorphic groups of order $n$.
You should worry less about dotting i's and crossing t's.]
If $E$ contains $n$ elements explain why there are
exactly $n^{n^{2}}$ distinct functions $f:E\times E\rightarrow E$
and use this fact to show that there are at most
$n^{n^{2}}$ non-isomorphic groups of order $n$.
(vi) If $H$ is a subgroup of a finite group $G$ and
$x\notin H$ show that $\{x\}\cup H$ generates
a subgroup of order at least twice the order of $H$.
Deduce that that a group of order $n$ has a set of
generators with at most $\log_{2} n$ elements.
(That is to say, we can find $X$ a subset of $G$
with at most $\log_{2} n$ elements and
$\gen X=G$. We define $\log_{2} n$ by
the equation $2^{\log_{2} n}=n$.)
(v) Suppose that $X$ generates a group $G$. Explain
how, given the values of $xg$ and $x^{-1}g$ for
every $x\in X$ and $g\in G$, we may compute $uv$
for every $u,\,v\in G$. Deduce that there are at most
$n^{2n\log_{2}n}$ non-isomorphic groups of order $n$.
\end{question}
\section{Third exercise set}
Most of the exercises in these exercise sets
are taken from earlier sheets of Professor Beardon. In each case,
the first five or so exercises are intended to be short
and any exercises after the first twelve are for enthusiasts.
(The extra questions may come in handy for revision,
or your supervisor may choose a different selection
of questions or one of the extra questions such as
Exercise~\ref{Euler onwards} or~\ref{Vandermonde}
may catch your fancy.)
\begin{question}\label{C3.1}
Show that if a group $G$ is generated by two elements
$a$ and $b$, where $a^{-1}ba=b^2$ and
$b^{-1}ab = a^2$, then $G$ contains only one element.
[Recall that `$G$ is generated by two elements $a$ and $b$'
means that every element of $G$ is the product of terms of the form
$a$, $b$, $a^{-1}$ and $b^{-1}$.]
\end{question}
\begin{question}\label{C3.2}
The dihedral group $D_{2n}$ is the
full symmetry group of a regular plane $n$-gon.
Show that, if the integer $m$ divides $2n$, then $D_{2n}$
has a subgroup of order $m$.
If $n\geq m\geq 3$, show that $D_{2m}$ is isomorphic to a subgroup
of $D_{2n}$ if and only if $m$ divides $n$.
\end{question}
\begin{question}\label{C3.3}
Show that the set of real non-singular
$3\times 3$
upper-triangular\footnote{If you come across a word which you do not know
like `upper-triangular' you have various choices. You
can not do the question OR you can ask a friend
OR go and look in the index in the algebra books
in your college library. Which of these three choices
is the stupidest? Actually an upper triangular matrix $(a_{ij})$
is one with $a_{ij}=0$ whenever $i>j$ i.e., one with all elements
below the diagonal zero.} matrices form a group under
matrix multiplication. Does this group contain any elements
of order two which are not diagonal matrices?
\end{question}
\begin{question}\label{C3.4} Let $G$ the group of orthogonal $2\times 2$ real
matrices and let $N$ be a normal subgroup of $G$ that contains
a reflection in some line through the origin. Show that $N$ contains
all reflections and deduce that $N=G$.
\end{question}
\begin{question}\label{C3.5} Show that the set of real $2\times 2$
upper triangular matrices of positive determinant is a group
$G$ under matrix multiplication. Show that the map
$\theta$ given by
\[\theta
\left(
\begin{matrix}
a&b\\0&d
\end{matrix}
\right)
=\log ad
\]
is a homomorphism of $G$ onto the additive group ${\mathbb R}$.
What is the kernel $K$ of $\theta$?
\end{question}
\begin{question}\label{C3.6}
Let $G$ be the set of all $3\times 3$
real matrices of determinant $1$ of the form
\[
\left(
\begin{matrix}
a&0&0\\
b&x&y\\
c&z&w
\end{matrix}
\right).
\]
Show that $G$ is a group under matrix multiplication.
Find\footnote{That is
to say, guess, and
then verify that your guess is correct. Guessing is
an essential part of mathematics. Very good guessers
are very good mathematicians (provided they understand
the difference between a guess and a proof).} a homomorphism from $G$
onto the group of all non-singular $2\times 2$ real matrices
and find its kernel.
\end{question}
\begin{question}\label{C3.7}
Let ${\mathbb Z}$, ${\mathbb Q}$
and ${\mathbb R}$ be the additive groups of integers,
rational and real numbers respectively. Show that every element
of the quotient group ${\mathbb Q}/{\mathbb Z}$ has finite order.
Show that every element
of the quotient group ${\mathbb R}/{\mathbb Q}$
(apart from the identity) has infinite order.
Show that some elements of ${\mathbb R}/{\mathbb Z}$
have infinite order and some non-identity elements do not.
\end{question}
\begin{question}\label{C3.8}
The group of $2\times 2$ real non-singular
matrices is the General Linear Group $GL(2,{\mathbb R})$;
the subset of $GL(2,{\mathbb R})$ consisting of matrices of
determinant $1$ is called the Special Linear Group $SL(2,{\mathbb R})$.
Show that $SL(2,{\mathbb R})$ is, indeed, a subgroup of
$GL(2,{\mathbb R})$ and that it is, in fact, normal. Show that
the quotient group $GL(2,{\mathbb R})/SL(2,{\mathbb R})$
is isomorphic to the multiplicative group of non-zero real numbers.
[The neatest way to do this question is to reflect on
the isomorphism theorem (Theorem~\ref{isomorphism theorem}).]
\end{question}
\begin{question}\label{C3.9}
Let $G$ and $H$ be groups and $\phi:G\rightarrow H$
a homomorphism with kernel $K$. Show that, if
$K=\{e,a\}$, then $x^{-1}ax=a$ for all $x\in G$.
Show that:--
(i) There is a homomorphism from $O(3)$, the orthogonal group
of $3\times 3$ real matrices, onto a group of order $2$
with kernel the special orthogonal group $SO(3)$.
(ii) There is a homomorphism from $S_{3}$
the symmetry group on $3$ elements to a group of order $2$
with a kernel of order $3$.
(iii) There is a homomorphism from $O(3)$ onto $SO(3)$
with kernel of order $2$.
(iv) There is no homomorphism from $S_{3}$ to a group of order $3$
with a kernel of order $2$.
\end{question}
\begin{question}\label{C3.10}
For a combinatorialist a \emph{graph}
is a finite set of points called \emph{vertices}
and some \emph{edges}. Each edge joins two vertices
and there is at most one edge $[ab]=[ba]$ joining any two
vertices $a$ and $b$.
(Think of airports with at most one route joining
any two airports.) Two such graphs with vertices labelled
$1$ to $6$ are shown in Figure~\ref{Graphs $A$ and $B$}.
\begin{figure}[h]
\includegraphics{Algdiag.eps}
\caption{Graphs $A$ and $B$}\label{Graphs $A$ and $B$}
\end{figure}
\noindent Graph $A$ has edges $[12]$, $[23]$, $[34]$, $[45]$, $[56]$,
$[61]$, $[35]$, $[31]$ and $[51]$.
\noindent Graph $B$ has edges $[12]$, $[23]$, $[34]$, $[45]$, $[56]$,
$[61]$, $[35]$, $[26]$, $[63]$ and $[52]$.
A \emph{symmetry} $\rho$ of the graph is a permutation of $\{1,\dots,6\}$
such that $(\rho a,\rho b)$ is an edge if and only if $(a,b)$ is
an edge. Show that the symmetries of a graph form
a subgroup of $S_{6}$.
For each of graphs $A$ and $B$
\ (a) find the symmetry group of the graph;
\ (b) find the orbit and stabiliser for each vertex;
\ (c) verify the orbit-stabiliser theorem for each vertex.
\end{question}
\begin{question}\label{C3.11}
Let $G$ be a finite group and $X$ the set of its subgroups.
Show that $g(L)=gLg^{-1}$ [$g\in G$, $L\in X$] defines an
action of $G$ on $X$. If $H$ is a proper subgroup of $G$
show that the orbit of $H$ has at most $|G|/|H|$ elements
and, by considering overlapping, or otherwise, show that there
is an element of $G$ which does not belong to any
conjugate of $H$.
\end{question}
\begin{question}\label{C3.12}
In each case, give reasons for your answer.
(i) Is it true that, if a finite group $G$ acts on a finite
set $X$, then every $g\in G$
has a fixed point? [Recall that $x$ is a fixed point for $g$
if $gx=x$.]
(ii) Is it true that, if a finite group $G$ with more than one element
acts faithfully on a finite
set $X$ with more than one element, then there exists a $g\in G$
with no fixed point?
(iii) Can an infinite group have a finite subgroup with more
than one element?
(iv) Let $a$ and $b$ be elements of an Abelian group. If $a$
and $b$ have order 2, what are the possible orders of $ab$?
(v) Is every subgroup of a normal subgroup of a group $G$
itself normal in $G$?
(vi) Let $G_{1}$ and $G_{2}$ be arbitrary groups. Does there
exist a group $G$ with subgroups $H_{1}$ and $H_{2}$ such that
$H_{1}$ is isomorphic to $G_{1}$ and $H_{2}$ is isomorphic
to $G_{2}$?
(vii) [For those who have done the course `Numbers and Sets'
or know about countability
from other sources.]
Does there exist an uncountable set of finite
groups no two of which are isomorphic?
\end{question}
\noindent\hrulefill
\begin{question}\label{C3.13}
Let $G$ be a group acting faithfully on a finite set $X$.
(i) We say that $G$ acts transitively on $X$
if there is only one orbit. Show that $G$
acts transitively if and only if given $x,\,y\in X$
we can find a $g\in G$ with $gx=y$.
(ii) Suppose that $G$ acts transitively on $X$.
Define a function $T:G\times X\rightarrow {\mathbb Z}$
by $T(g,x)=1$ if $gx=x$, $T(g,x)=0$ otherwise.
By evaluating $\sum_{x\in X}\sum_{g\in G}T(g,x)$
in two different ways, show that
\[\frac{1}{|G|}\sum_{g\in G}I(g)=1,\]
where $I(g)$ is the number of elements fixed by $g$.
(iii) Deduce that, if $G$ acts transitively on $X$
and $X$ has more than one element,
then there must exist a $g\in G$ with no fixed point.
(iv) Suppose now that we do not assume that $G$
acts transitively. Find and prove a formula
along the lines of~(ii) for the number $N$ of distinct
orbits.
\noindent[The formula you find is called the Cauchy--Frobenius
formula. If your supervisor is a combinatorialist or a group
theorist you may expect an impassioned speech on the many
uses of this result, but, alas, there is not room
in 1A for all that we would like to teach.]
\end{question}
\begin{question}\label{C3.14}
If $G$ is a group, we call an isomorphism
$\alpha:G\rightarrow G$ an \emph{automorphism}. Show that
the automorphisms of $G$ form a group under composition.
Consider ${\mathbb Q}$ (the rationals) as an additive group.
Show that, if $r$ and $s$ are non-zero rationals, there is a
unique automorphism $\alpha$ with $\alpha(r)=s$.
Deduce that the group of automorphisms of ${\mathbb Q}$ is
isomorphic to the multiplicative group of
${\mathbb Q}\setminus\{0\}$.
\end{question}
\begin{question}\label{C3.15} You are skiing on the border
of Syldavia. By mistake you cross into Borduria and are arrested.
The border guard turns out to be an old Trinity man and
agrees to let you go provided that you prove you are indeed
a mathematician by classifying all groups of order $10$.
Do so.
\end{question}
\begin{question}\label{C3.16} (i) Consider the collection
${\mathcal A}$
of maps $T:{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
given by
\[T{\mathbf x}=\alpha({\mathbf x})+{\mathbf u}\]
where $\alpha$ is a non-singular linear map
(i.e. $\alpha\in GL({\mathbb R}^{2})$)
and ${\mathbf u}\in{\mathbb R}^{2}$.
Show that ${\mathcal A}$ forms a group under composition.
(ii) Show that the set of isometries, that is to say,
$T$ such that
\[\|T{\mathbf x}-T{\mathbf y}\|=\|{\mathbf x}-{\mathbf y}\|,\]
for all ${\mathbf x},\,{\mathbf y}\in{\mathbb R}^{2}$
forms a subgroup of ${\mathcal A}$. [You may assume
that any isometry which fixes $\boldsymbol 0$
is linear.]
(iii) In what follows we seek to characterise
the set ${\mathcal B}$ of $T\in {\mathcal A}$
such that $T^{2}=I$, the identity map. Suppose
\[T{\mathbf x}=\alpha({\mathbf x})+{\mathbf u}\]
where $\alpha\in GL({\mathbb R}^{2})$
and ${\mathbf u}\in{\mathbb R}^{2}$.
Show that $T\in {\mathcal B}$ if and only
if $\alpha^{2}=I$ and $\alpha{\mathbf u}=-\mathbf u$.
(iv) Suppose that $\alpha\in GL({\mathbb R}^{2})$
and $\alpha^{2}=I$. Show that either $\alpha=I$,
or $\alpha=-I$, or there exists a basis ${\mathbf f}_{1}$,
${\mathbf f}_{2}$ with respect to which
$\alpha$ has matrix
\[\left(\begin{matrix} -1&0\\0&1\end{matrix}\right).\]
(v) Suppose that $\alpha\in GL({\mathbb R}^{2})$
and $\alpha^{2}=I$. Show that either $\alpha=I$,
or $\alpha=-I$, or there exists a
orthonormal basis ${\mathbf e}_{1}$,
${\mathbf e}_{2}$ with respect to which
$\alpha$ has matrix
\[\left(\begin{matrix} -1&k\\0&1\end{matrix}\right)\]
for some real $k$.
(vi) Show that $T\in {\mathcal B}$ if and only if
\[T{\mathbf x}={\mathbf x}\]
for all ${\mathbf x}\in{\mathbb R}^{2}$,
or
\[T{\mathbf x}=-{\mathbf x}+{\mathbf u}\]
for all
${\mathbf x}\in{\mathbb R}^{2}$ and some fixed
${\mathbf u}\in{\mathbb R}^{2}$, or
if, after an appropriate rotation of axes,
\[T\left(\begin{matrix}x\\y\end{matrix}\right)
=\left(\begin{matrix} -1&k\\0&1\end{matrix}\right)
\left(\begin{matrix}x\\y\end{matrix}\right)
+\left(\begin{matrix}u\\0\end{matrix}\right)\]
for some fixed $k,\,u\in{\mathbb R}$.
(vii) Show that, if $T\in {\mathcal B}$,
then $T$ leaves some line $l$ through the origin fixed
(that is to say, $Tl=l$).
(viii) Is ${\mathcal B}$ a subgroup of ${\mathcal A}$?
(xi) Show that, if $T$ is an isometry and $T^{2}=I$,
then $T$ is a reflection in some line (not necessarily
through the origin), or a rotation
through $\pi$ about some point (not necessarily
the origin) or the identity map.
\end{question}
\section{Fourth exercise set}
Most of the exercises in these exercise sets
are taken from earlier sheets of Professor Beardon. In each case,
the first five or so exercises are intended to be short
and any exercises after the first twelve are for enthusiasts.
(The extra questions may come in handy for revision,
or your supervisor may choose a different selection
of questions or one of the extra questions such as
Exercise~\ref{Euler onwards} or~\ref{Vandermonde}
may catch your fancy.)
\begin{question}\label{C4.1}
Let $X=\{0,\,1,\,2,\,\dots,\,16\}$.
Express each of the following permutations of $X$ as a product of
cycles and decide whether it is even or odd.
(a) The function defined by $f_{1}(x)\equiv 2x\mod{17}$.
(b) The function defined by $f_{2}(x)\equiv x+5\mod{17}$.
(c) The function defined by $f_{3}(x)\equiv 3x\mod{17}$.
Explain why the function defined by $f_{4}(x)\equiv x^{2}\mod{17}$
is not a permutation.
\end{question}
\begin{question}\label{C4.2}
What is the largest possible order of an element in $S_{5}$?
What is the largest possible order of an element in $S_{9}$?
What is the largest possible order of an element in $S_{16}$?
[You may need to run through several possibilities.]
Show that every element in $S_{10}$ of order $14$ is odd.
\end{question}
\begin{question}\label{C4.3}
Show that any subgroup of $S_{n}$
which is not contained in $A_{n}$ contains an equal
number of odd and even elements.
\end{question}
\begin{question}\label{C4.4}
Let $g(z) = (z+1)/(z-1)$.
By considering the points $g(0)$, $g(\infty )$,
$g(1)$ and $g(i)$, find the image of the real axis
${\mathbb R}$ and the imaginary axis (in each case
with $\infty$ attached) under $g$. What is the image
under $g$ of $\{x+iy\,:\,x>0,\,y>0\}$?
\end{question}
\begin{question}\label{C4.5}
Express the M\"{o}bius transformation
$z\mapsto(2z+3)/(z-4)$ as the composition of maps
of the form $z\mapsto az$, $z\mapsto z+b$ and
$z\mapsto 1/z$. Hence show that $z\mapsto(2z+3)/(z-4)$
maps the circle $|z-2i|=2$ onto the circle
\[\left|z + \left(\frac{6+11i}{8}\right) \right| =\frac{11}{8}.\]
\end{question}
\begin{question}\label{C4.6}
(i) Show that, if $z_{1}$, $z_{2}$, $z_{3}$ and
$w_{1}$, $w_{2}$, $w_{3}$ are two triples of distinct points
in ${\mathbb C}\cup\{\infty\}$, there exists a unique M\"{o}bius
transformation that takes $z_{j}$ to $w_{j}$ [$j=1,\ 2,\ 3$].
Hence show that 4 distinct points
$z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$ lie on
a circle or a straight line if and only if the cross ratio
$CR( z_{1},z_{2},z_{3},z_{4})$ is real.
(ii) If
$z_{1}$, $z_{2}$, $z_{3}$ and $z_{4}$ are distinct and
$CR( z_{1},z_{2},z_{3},z_{4})=\lambda$ write down
the possible values of
$CR( z_{\sigma 1},z_{\sigma 2},z_{\sigma 3},z_{\sigma 4})$
when $\sigma \in S_{4}$, proving your assertion.
(iii) Use cross-ratios to prove Ptolemy's theorem:--
`For any quadrilateral whose vertices lie on a circle
the product of the lengths of the diagonals is the sum
of the products of the lengths of pairs of opposite
sides.'
\end{question}
\begin{question}\label{C4.7}
Let $G$ be the subgroup of those M\"obius
transformations which map the set $\{ 0,1,\infty \}$ onto itself.
Show that the only elements of $G$ are the functions
\begin{small}
\[f_{0}(z)=z,\ f_{1}(z) = 1-z,\ f_{2}(z) =\frac{1}{z},
\ f_{3}(z) =\frac{z}{z-1},\ f_{4}(z) =\frac{1}{1-z},
\ f_{5}(z) =\frac{z-1}{z}.\]
\end{small}
To which standard group is $G$ isomorphic?
Find the group of M\"obius transformations
which map the set $\{ 0,2,\infty \}$ onto itself.
\end{question}
\begin{question}\label{C4.8}
Show that if $|a|\neq1 $ the map
\[T_{a}z=\frac{z-a}{a^{*}z-1}\]
takes the unit circle $\{z:\ |z|=1\}$ to itself.
What are $T_{a}0$ and $T_{a}a$? What does $T_{a}$ take the unit disc
$D=\{z:\ |z|<1\}$ to (i) if $|a|<1$, (ii) if $|a|>1$?
What happens if $|a|=1$?
Show that the only M\"{o}bius map $S$ with $S0=0$, $S1=1$ and
$SD=D$ is the identity map. Hence, or otherwise, show that
the most general M\"{o}bius map $R$ with $RD=D$ is given by
\[Rz=\exp(i\theta)\frac{z-a}{a^{*}z-1},\]
where $\theta$ is real and $|a|<1$.
Find the most general M\"{o}bius map which takes
the half-plane $\{z:\ {\rm Im}(z)>0\}$ to the unit disc $D$.
(You may leave your result in the form of a composition.)
\end{question}
\begin{question}\label{C4.9}
Show that every M\"{o}bius
transform has at least one fixed point.
Identify the stabiliser $G$ of $\infty$. Find all the $T\in G$
which have only one fixed point and show that
\[T^{n}z\rightarrow\infty\ \ \ \mbox{as $|n|\rightarrow\infty$}\]
for all such $T$ and all $z\in {\mathbb C}$. Hence show that,
if $S$ is any M\"{o}bius map with a unique fixed point
$z_{0}$, then
\[S^{n}z\rightarrow z_{0}\ \ \ \mbox{as $|n|\rightarrow\infty$}\]
for all $z\in {\mathbb C}\cup\{\infty\}$.
Identify the subgroup $H$ of M\"{o}bius maps which leave
$0$ and $\infty$ fixed and the subgroup $H'$ of M\"obius
maps which leave
the set $\{0,\infty\}$ fixed. Describe, in general terms,
the orbits under $T\in H$ (that is to say, the orbit
of a point $z$ under the cyclic group generated by $T$)
if $T$ does not leave the circle
$\{z\in{\mathbb C}:\ |z|=1\}$ fixed. Give an example of
a M\"{o}bius map for which the orbit of every point with
two exceptions has four elements.
Show that every M\"{o}bius
transform, with exactly one exception,
has exactly one fixed point or exactly two
fixed points.
\end{question}
\begin{question}\label{C4.10}
The \emph{cycle type} of an element
$\sigma$ of the symmetric group $S_{n}$ is defined to be
the collection of lengths of disjoint cycles that form
$\sigma$. (For example $(1754)(268)(3)(9)\in S_{9}$
has cycle type $4$, $3$, $1$, $1$.)
(i) Show that $\sigma_{1}$ and $\sigma_{2}$ in $S_{n}$
have the
same cycle type if and only if there exists
a $\tau\in S_{n}$ such that $\sigma_{1}=\tau^{-1}\sigma_{2}\tau$.
(ii) Find the number of elements of each cycle type
in $S_{5}$. Which of them belong to $A_{5}$?
\end{question}
\begin{question}\label{C4.11}
(i) Show that $S_{n}$ is generated by
transpositions of the form $(1j)$ with $2\leq j\leq n$.
(ii) Show that $S_{n}$ is generated by
transpositions of the form $(j-1\,j)$ with $2\leq j\leq n$.
(iii) Show that $S_{n}$ is generated by the two elements
$(12)$ and $(123\dots n)$.
(iv) For which values of $n$ is $S_{n}$ generated by a single
element? Prove your answer.
(v) Calculate the product $(12)(13)$ in $S_{n}$ for $n\geq 3$.
Calculate the product $(123)(124)$ in $S_{n}$ for $n\geq 4$.
Show that, if $n\geq 3$, $A_{n}$ is generated by the set of all
cycles of length $3$ in $S_{n}$. What happens if $n=2$ or $n=1$?
\end{question}
\begin{question}\label{C4.12}\label{wax}
By dint of constant practice, the well known
man about town Simon Wendel Indler has reached
the point where, given a pack of $2n$ cards, he can
execute a `perfect shuffle' in which the card
in $r$th position in the pack moves to the $2r$th position
for $1\leq r\leq n$ and to the $2(r-n)-1$st position
for $n+1\leq r\leq 2n$.
(i) By using the cycle notation,
find how many shuffles does it take him to return the
pack to its initial state when $n=1,\,2,\,3,\,4,\,5,\,6,\,7$?
Are there any remarks about particular things for
particular $n$ that might be helpful to Mr Indler?
Remember that even a small amount of extra information
gives a card player a substantial advantage.
(ii) Why does Mr Indler prefer a shuffle in which
the card
in $r$th position in the pack moves to the $2r-1$th position
for $1\leq r\leq n$ and to the $2(r-n)$st position
for $n+1\leq r\leq 2n$? (This is called an `out-shuffle'.
The shuffle described in the first paragraph is called an
`in-shuffle'.)
(iii) Show that the in-shuffle can be described using modular
arithmetic by saying that the card in position $r$ goes to
position $k$ where
\[k\equiv 2r\mod{2n+1}.\]
Explain why
the pack returns to its original
order after $\phi(2n+1)$ shuffles where $\phi$ is Euler's
totient function. Apply this result to a standard pack of $52$
cards.
(iv) Now consider the out-shuffle. Show that, if we ignore the first
and last cards and renumber the remainder so that what was the
$r+1$st card is now the $r$th card, then the effect
of the out-shuffle can be described using modular
arithmetic by saying that the card in position $r$ goes to
position $k$ where
\[k\equiv 2r\mod{2n-1}.\]
Explain why the pack returns to its original
order after $\phi(2n-1)$ shuffles where $\phi$ is Euler's
totient function. Apply this result to a standard pack of $52$
cards.
(v) Show that, in fact, out-shuffling returns a standard pack of $52$
cards to it original state in 8 shuffles (making it a particularly
useful shuffle for Mr Indler and for stage magicians). Why is
this consistent with the result of (iv)?
(vi) Show that in-shuffling does require at least $52$ shuffles
to return the pack to its original order. (You should only need
$26$ easy calculations, or less, to show this. Cunning can replace
computation but thinking of cunning tricks takes effort.)
(vii) Is it true that every orbit for an in-shuffle is the
same size? Is it true that every orbit for an in-shuffle has
a size dividing the size of the orbit of the first card?
Can you give an infinite set of integers $n$
such that every orbit for an in-shuffle is the
same size?
Give reasons.
\vspace{1\baselineskip}
\noindent
[Remark: Provided that your in-shuffling is not quite accurate,
in-shuffling is a very good way of randomising packs. It has
been shown that seven imperfect in-shuffles are sufficient.
The study of imperfect shuffles only began a few years ago.
It requires probability theory and group theory.]
\end{question}
\noindent\hrulefill
\begin{question}\label{C4.13}
Consider the following groups:--
$({\mathbb R}\setminus\{0\},\times)$ the non-zero
reals under multiplication, $({\mathbb R}^{+},\times)$ the strictly
positive reals under multiplication, $({\mathbb R},+)$ the reals
under addition, $({\mathbb Z},+)$ the integers under addition,
$({\mathbb R}/{\mathbb Z},+)$, $SO({\mathbb R},2)$ the group
of $2\times 2$ orthogonal real matrices of determinant $1$
under matrix multiplication,
$O({\mathbb R},2)$ the group of orthogonal real matrices
under matrix multiplication.
Establish which are isomorphic and which are
not.
\end{question}
\begin{question}\label{C4.14}
Let ${\mathcal M}$ be the group of M\"{o}bius maps
acting on ${\mathbb C}\cup\{\infty\}$. If $\alpha(z)=z^{-1}$
and $\beta z=z+2$, show that $0<|\alpha\beta^{r}(z)|<1$
whenever $|z|<1$ and $r$ is a non-zero integer.
Deduce that, if $t$ is a strictly positive integer
and $r_{1}$, $r_{2}$, \dots, $r_{t}$ are non-zero integers,
then $\alpha\beta^{r_{1}}\alpha\beta^{r_{2}}\dots\alpha\beta^{r_{t}}$
does not lie in the stabiliser of $0$.
Does the group generated by $\alpha$ and $\beta$
contain a non-identity element which lies in the stabiliser of $0$?
Give a proof or counter-example.
\end{question}
\begin{question}\label{C4.15}
If $H$ is a subgroup of a finite group $G$ and $G$ has twice
as many elements as $H$, show that $H$ is normal in $G$.
How many elements does the group $G_{1}$ of isometries of the
cube have, how many does the group $G_{2}$ of rotations of
a cube have? How many elements does the group $G_{3}$ of
isometries of the regular tetrahedron have, how many does
the group $G_{4}$ of rotations of a regular tetrahedron have?
Give reasons.
By considering the effect of rotations on the diagonals
of the cube, or otherwise, show that $G_{2}$ is isomorphic
to $S_{4}$. Give, with reasons, similar isomorphisms of
$G_{3}$ and $G_{4}$ with permutation groups or subgroups.
By colouring opposite faces of the cube in the same colour
but otherwise using different colours find a surjective
homomorphism from $G_{2}$ to $S_{3}$ and so a surjective
homomorphism from $S_{4}$ to $S_{3}$. Deduce that $S_{4}$
has a normal subgroup with 4 elements.
\end{question}
\begin{question}\label{C4.16} For each integer $c$,
define $f_{c}:{\mathbb Z}\rightarrow{\mathbb Z}$ by
$f_{c}(k)=k+(-1)^{k}c$. Show that $\{f_{c}\,:\,c\in{\mathbb Z}\}$
is a subgroup of the group of permutations of ${\mathbb Z}$.
\end{question}
\begin{question}\label{C4.17}\label{Fix more direct}
Here is another proof of Lemma~\ref{Fix more}.
(i) Show that every M\"{o}bius map has a fixed point.
Deduce that every M\"{o}bius map is conjugate to a M\"{o}bius map
which fixes $\infty$.
(ii) Show that every M\"{o}bius map
which fixes $\infty$ either has another fixed point
or has the form $z\mapsto z+a$.
Deduce that every M\"{o}bius map is either conjugate to a M\"{o}bius map
which fixes $\infty$ and $0$, or is conjugate to map
of the form $z\mapsto z+a$.
(iii) Show that every M\"{o}bius map is either the identity
or is conjugate to map of the form $z\mapsto z+1$
or is conjugate to map of the form $z\mapsto \lambda z$
with $|\lambda|\geq 1$. Why is the map $z\mapsto z+1$
not conjugate to map of the form $z\mapsto \lambda z$
with $|\lambda|\geq 1$?
(iv) Suppose that $Tz=\lambda z$, $Sz=\mu z$, $R$ is
M\"{o}bius map and $T=RSR^{-1}$. Show that $Rz=az$
or $Rz=a/z$ (for some $a\neq 0$). Deduce that
if $|\mu|,\,|\lambda|\geq 1$ and $\mu,\,\lambda\neq 1$ then
either $\lambda=\mu$
or $|\lambda|=|\mu|=1$ and $\lambda=\mu^{*}$.
\end{question}
\begin{question} {\bf (The Vandermonde determinant.)}%
\label{C4.18}\label{Vandermonde}
(i) Consider the function
$F:{\mathbb R}^{3}\rightarrow{\mathbb R}$ given by
\[F(x,y,z)=\det
\left(\begin{matrix}1&1&1\\x&y&z\\x^{2}&y^{2}&z^{2}\end{matrix}\right).\]
Explain why $F$ is a multinomial of degree $3$. By considering
$F(x,x,z)$, show that $F$ has $x-y$ as a factor. Explain why
$F(x,y,z)=A(x-y)(y-z)(z-x)$ for some constant $A$. By looking
at the coefficient of $yz^{2}$, or otherwise, show that
$F(x,y,z)=(x-y)(y-z)(z-x)$.
(ii) Consider the $n\times n$ matrix $V$ with $v_{rs}=x_{s}^{r-1}$.
Show that, if we set
\[F(x_{1},x_{2},\dots,x_{n})=\det V,\]
then
\[F(x_{1},x_{2},\dots,x_{n})=\prod_{i>j}(x_{i}-x_{j}).\]
(iii) If $\sigma\in S_{n}$ and all the $x_{r}$ are distinct,
we set
\[\zeta(\sigma)=\frac{F(x_{\sigma(1)},x_{\sigma(2)},\dots,x_{\sigma(n)})}
{F(x_{1},x_{2},\dots,x_{n})}.\]
Use the rules for calculating determinants to find $\zeta(\sigma)$
when $\sigma$ is a transposition and when $\sigma$ is the product
of $k$ transpositions.
\noindent[Part~(iii) shows that we could define signature
using determinants but it is more common to define determinants
using signature and we must be careful to avoid circularity.]
\end{question}
\begin{question}\label{C4.19}
(i) Show that, if $n\geq 5$, then $S_{n}$ is generated by $4$-cycles
(that is to say, cycles of length $4$).
Can the identity can be written as the product of
an odd number of $4$-cycles?
(ii) Let $n\geq 3$ and let $X$ be the subset of $S_{n}$
consisting of those $\sigma$ with $\sigma 1=2$.
Show that $S_{n}$ is generated by $X$. Can we define
a function $\Omega:S_{n}\rightarrow\{-1,1\}$ by
taking $\Omega(\sigma)=(-1)^{n}$ if $\sigma$
is the product of $n$ elements of $X$
and their inverses?
(iii) If $G$ is an Abelian group and $T:S_{n}\rightarrow G$
is a homomorphism, what can you say about the image $T(S_{n})$?
\end{question}
\end{document}