\documentclass[12pt,a4]{amsart}
\newtheorem{Lemma}{Lemma}
\newtheorem{Theorem}[Lemma]{Theorem}
\newtheorem{Definition}[Lemma]{Definition}
\newtheorem{Axiom}[Lemma]{Axiom}
\newtheorem{Example}[Lemma]{Example}
\newtheorem{Corollary}[Lemma]{Corollary}
\newtheorem{Remark}[Lemma]{Remark}
\begin{document}
\title{Results in First Part of Methods and Calculus}
\author{T.W.K\"{o}rner}
\maketitle
\begin{Definition}
Let ${\bf a}_{n},{\bf a}\in{\mathbb R}^{k}$. We say that
${\bf a_{n}\rightarrow a}$ as $n\rightarrow\infty$, if
given $\epsilon>0$, we can find $N(\epsilon)$ such that
$||{\bf a}_{n}-{\bf a}||<\epsilon$ for all $n>N(\epsilon)$.
\end{Definition}
\begin{Theorem}
(i) If ${\bf a}_{n}\rightarrow {\bf a}$ and
${\bf b}_{n}\rightarrow {\bf b}$
in ${\mathbb R}^{k}$ then
${\bf a}_{n}+{\bf b}_{n}\rightarrow {\bf a+b}$ as $n\rightarrow\infty$.
(ii) If $a_{n}\rightarrow a$ and $b_{n}\rightarrow b$ in
${\mathbb R}$ (or ${\mathbb C}$) then $a_{n}b_{n}\rightarrow ab$
as $n\rightarrow\infty$.
(iii) If $a_{n}\rightarrow a$ as $n\rightarrow\infty$ in
${\mathbb R}$ (or ${\mathbb C}$) and
$a\neq 0$, $a_{n}\neq 0$ {\rm [}$n=1,2,\ldots${\rm ]} then
$a_{n}^{-1}\rightarrow a^{-1}$ as $n\rightarrow\infty$.
\end{Theorem}
There are many related definitions.
\begin{Definition} (i) If $f:{\mathbb R}^{l}\rightarrow{\mathbb R}^{k}$
we say that $f({\bf x})\rightarrow {\bf a}$ as
${\bf x}\rightarrow {\bf y}$ if given $\epsilon>0$ we
can find a $\delta(\epsilon)>0$ such that, whenever
$0<||{\bf x-y}||<\delta(\epsilon)$ it follows that
$||f({\bf x})-{\bf a}||<\epsilon$.
(ii) Let $a_{n}\in{\mathbb R}$. We say that $a_{n}\rightarrow\infty$
if, given any $K$ we can find $N(K)$ such that
$a_{n}>K$ for all $n>N(K)$.
\end{Definition}
\begin{Axiom}[Fundamental Axiom of Analysis]
\label{Ax} If $a_{1}$,
$a_{2}$,\ldots is an increasing sequence in ${\mathbb R}$
and there exists an $A\in{\mathbb R}$ such that $a_{n}\leq A$
for all $n$,
then there exists an $a\in {\mathbb R}$ such that
$a_{n}\rightarrow a$ as $n\rightarrow\infty$.
\end{Axiom}
\begin{Definition} We say that the series ${\bf a}_{n}$ in
${\mathbb R}^{k}$ converges to the sum ${\bf a}$ if
\[\sum_{n=1}^{N}{\bf a}_{n}\rightarrow{\bf a}\ \
\mbox{as $N\rightarrow\infty.$}\]
We write
\[\sum_{n=1}^{\infty}{\bf a}_{n}={\bf a}.\]
\end{Definition}
\begin{Lemma}[Absolute Convergence implies Convergence]
If ${\bf a}_{n}\in{\mathbb R}^{k}$ and
$\sum_{n=1}^{\infty}||{\bf a}_{n}||$ converges then
$\sum_{n=1}^{\infty}{\bf a}_{n}$ converges.
\end{Lemma}
\begin{Lemma}[Comparison Test] If $a_{n},b_{n}\in{\mathbb R}$
and $0\leq a_{n}\leq b_{n}$ then whenever
$\sum_{n=1}^{\infty}b_{n}$ converges $\sum_{n=1}^{\infty}a_{n}$
must converge.
\end{Lemma}
\begin{Corollary}[Ratio Test] If $a_{n}\in{\mathbb R}$, $01$ then $\sum_{n=1}^{\infty}a_{n}$ diverges.
\end{Corollary}
\begin{Example} If $a_{2n}=2^{-2n}$, $a_{2n+1}=2^{-2n-2}$
then the ratio test fails but comparison with $2^{-n}$
show that the series $a_{n}$ is convergent.
\end{Example}
\begin{Lemma}[Integral Test] Suppose that
$f:[0,\infty)\rightarrow[0,\infty)$ is a decreasing continuous
function. Then if one of
\[\int_{0}^{N}f(x)dx\ \ \mbox{and}\ \ \sum_{n=0}^{N}f(n)\]
tends to a (finite) limit as $N\rightarrow\infty$ so does the other.
\end{Lemma}
\begin{Corollary} $\sum_{n=1}^{\infty}n^{-p}$
converges if and only if $p>1$.
\end{Corollary}
Note the failure of the ratio test for the series $n^{-p}$.
\begin{Example} If $f(x)=1-\cos(2\pi x)$ then
$\int_{0}^{N}f(x)dx$ diverges and $\sum_{n=0}^{N}f(n)$ converges
as $N\rightarrow\infty$.
\end{Example}
\begin{Lemma}[Alternating Series Test] (Not in syllabus.)
Suppose that $a_{n}$ is a decreasing sequence of
positive terms with $a_{n}\rightarrow 0$ as $n\rightarrow\infty$.
Then $\sum_{n=1}^{\infty}(-1)^{n}a_{n}$ converges.
\end{Lemma}
\begin{Example} $\sum_{n=1}^{\infty}(-1)^{n}n^{-p}$ converges
for $p>0$ but only converges absolutely for $p>1$.
\end{Example}
\begin{Theorem}[Rearrangement of Positive Series]
Let $\sigma:{\mathbb N}\rightarrow{\mathbb N}$
be a bijection. If $a_{n}\geq 0$ then, if the series $a_{n}$
converges, so does the rearranged series $a_{\sigma(n)}$
and
\[\sum_{n=1}^{\infty}a_{n}=\sum_{n=1}^{\infty}a_{\sigma(n)}.\]
\end{Theorem}
\begin{Corollary}[Rearrangement of Absolutely Convergent Series]
(Not in syllabus.) Let $\sigma:{\mathbb N}\rightarrow{\mathbb N}$
be a bijection. If ${\bf a}_{n}\in{\mathbb R}^{k}$ then,
if the series ${\bf a}_{n}$ is absolutely convergent,
so is the rearranged series ${\bf a}_{\sigma(n)}$ and
\[\sum_{n=1}^{\infty}{\bf a}_{n}=
\sum_{n=1}^{\infty}{\bf a}_{\sigma(n)}.\]
\end{Corollary}
\begin{Example} Let $a_{2n-1}=n^{-1}$, $a_{2n}=-n^{-1}$
{\rm [$n=1,2,\ldots$]}. Then (by comparison with appropriate
integrals)
\[\sum_{n=1}^{kN}a_{2n-1}+\sum_{n=1}^{lN}a_{2n}
=\sum_{n=lN+1}^{kN}n^{-1}\rightarrow \log(k/l)\]
as $N\rightarrow\infty$ whenever $k$ and $l$ are integers with
$k\geq l>0$.
\end{Example}
\begin{Theorem} Let $a_{n}\in{\mathbb C}$.
(i)If $\sum_{n=0}^{\infty}a_{n}z_{0}^{n}$ converges then
$\sum_{n=0}^{\infty}a_{n}z^{n}$ converges absolutely for
all $z\in{\mathbb C}$ with $|z|<|z_{0}|$.
(ii)If $\sum_{n=0}^{\infty}a_{n}z_{0}^{n}$ diverges then
the sequence $|a_{n}z^{n}|$ is unbounded for
all $z\in{\mathbb C}$ with $|z|>|z_{0}|$.
\end{Theorem}
\begin{Theorem}[Radius of Convergence] (Proof next year.)
Let $a_{n}\in{\mathbb C}$. Then, either
$\sum_{n=0}^{\infty}a_{n}z^{n}$ converges for all $z\in{\mathbb C}$
(and we say that the power series has radius of covergence
infinity) or there exists an $R$ with $R\geq 0$ such that
(i) $\sum_{n=0}^{\infty}a_{n}z^{n}$ converges for all $|z|R$.
\noindent We call $R$ the radius of convergence.
\end{Theorem}
\begin{Example} (i) If $a_{n}=1/n!$ then $R=\infty$.
(ii) If $a_{n}=n!$ then $R=0$.
(iii) If $a_{n}=r^{-n}$ then $R=r$.
(iv) If $a_{4n}=0$, $a_{4n+1}=1$, $a_{4n+2}=2$, $a_{4n+3}=1$
then $R=1$.
(v) If $a_{0}=1$, $a_{n}=n^{p}$ then $R=1$. If $p<-1$ then
we have convergence of $\sum_{n=0}^{\infty}a_{n}z^{n}$
for $|z|=1$, if $p\geq 0$ we have divergence. If $-1\leq p<0$
have convergence when $z=-1$ and divergence when $z=1$.
\end{Example}
\begin{Definition} We say that a function
$f:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$ is continuous at
${\bf x}\in{\mathbb R}^{n}$ if given $\epsilon>0$ we can find
a $\delta(\epsilon,{\bf x})>0$ such that, whenever
$||{\bf h}||<\delta(\epsilon,{\bf x})$, we have
$||f({\bf x+h})-f({\bf x})||<\epsilon$.
\end{Definition}
\begin{Theorem} (i) If $f:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
is continuous at ${\bf x}\in{\mathbb R}^{n}$ and
$g:{\mathbb R}^{m}\rightarrow{\mathbb R}^{p}$
is continuous at $f({\bf x})$ then the composed function
$g\circ f$ (defined by $g\circ f({\bf y})=g(f({\bf y}))$
is continuous at ${\bf x}$.
(ii) If $F,G:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
are continuous at ${\bf x}\in{\mathbb R}^{n}$ then so is $F+G$.
\end{Theorem}
\begin{Definition} We say that a function
$f:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$ is differentiable at
${\bf x}\in{\mathbb R}^{n}$ with derivative the linear map
$\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$ if
given $\epsilon>0$ we can find
a $\delta(\epsilon,{\bf x})>0$ such that, whenever
$||{\bf h}||<\delta(\epsilon,{\bf x})$, we have
\[||f({\bf x+h})-f({\bf x})-\alpha ({\bf h})||<\epsilon||{\bf h}||\]
\end{Definition}
We shall write $\alpha=(Df)({\bf x})$. If we
consider the special case $m=n=1$ then $(Df)(x)$ is
a linear map ${\mathbb R}\rightarrow{\mathbb R}$ and so there
exists a $\lambda\in{\mathbb R}$ such that $(Df)(x)h=\lambda h$
for all $h\in{\mathbb R}$. We thus have
\[|f(x+h)-f(x)-\lambda h||<\epsilon|h|\]
for $|h|<\delta(\epsilon,x)$, and so
\[\frac{f(x+h)-f(x)}{h}\rightarrow\lambda.\]
If we set $\lambda=f'(x)$ we recover our old definition
of differentiation. (Sometimes people write $f'({\bf x})=(Df)({\bf x})$
in the general case, but we must then remember that $f'({\bf x})$
is a linear map.)
\begin{Lemma} Suppose that
$f:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$ is differentiable at
${\bf x}\in{\mathbb R}^{n}$ with derivative $Df({\bf x})$.
If ${\bf u}$ is a vector in ${\mathbb R}^{n}$ and
${\bf v}$ is a vector in ${\mathbb R}^{m}$ then
\[g_{{\bf u}{\bf v}}(t)={\bf v}.f({\bf x}+t{\bf u})\]
defines a function
$g_{{\bf u}{\bf v}}:{\mathbb R}\rightarrow{\mathbb R}$
which is differentiable at {\bf 0} with derivative
${\bf v}.(Df({\bf x}))({\bf u})$.
\end{Lemma}
\begin{Lemma}
\label{MatNot}
Suppose that
${\bf u}_{1}$, ${\bf u}_{2}$,\ldots, ${\bf u}_{n}$
is a basis for ${\mathbb R}^{n}$ and
${\bf v}_{1}$, ${\bf v}_{2}$,\ldots, ${\bf v}_{m}$
is a basis for ${\mathbb R}^{m}$.
If write $f_{i,j}({\bf x})={\bf v_{i}}.(Df({\bf x}))({\bf u}_{j})$ then
the linear map $Df({\bf x}):{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
has matrix $(f_{i,j}({\bf x}))$ with respect to the given bases.
\end{Lemma}
There is an older and, in many circumstances, more convenient
notation called the partial derivative. Suppose $g$ is a
well behaved real function of $n$ real variables
$x_{1}$, $x_{2}$,\ldots, $x_{n}$. Then we write
\[\frac{\partial g}{\partial x_{i}}=
\lim_{h\rightarrow 0}
\frac{g(x_{1},x_{2},\ldots,x_{i}+h,\ldots,x_{n})-
g(x_{1},x_{2},\ldots,x_{i},\ldots,x_{n})}{h}\]
for the derivative of $g$ with respect to $x_{i}$ when
$x_{1},x_{2},\ldots,x_{i-1},x_{i+1},\ldots,x_{n}$
are kept fixed. With this notation, if we write
$f_{j}$ for the $j$th component of the function $f$ of the
preceding lemma, we have
\[f_{j,i}({\bf x})=\frac{\partial f_{j}}{\partial x_{i}}\]
However when reading material which uses partial derivatives
(as in Classical Thermodynamics) it is important to
be aware of possible ambiguities.
\begin{Example} Let $f(x,y)=x+y$. If we keep $y$ fixed and
allow $x$ to vary then
${\displaystyle \frac{\partial f}{\partial x}=1}$, but if
we keep $x+y$ fixed and allow $x$ to vary then
${\displaystyle \frac{\partial f}{\partial x}=0}$
\end{Example}
\begin{Lemma} If $\alpha:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
is linear there exists a constant $A$ such that
\[||\alpha({\bf x})||\leq A||{\bf x}||\]
for all ${\bf x}\in{\mathbb R}^{n}$.
\end{Lemma}
\begin{Theorem}[Chain Rule]\label{Chain}
If $f:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
is differentiable at ${\bf x}\in{\mathbb R}^{n}$ and
$g:{\mathbb R}^{m}\rightarrow{\mathbb R}^{p}$
is differentiable at $f({\bf x})$ then the composed function
$g\circ f$ (defined by $g\circ f({\bf y})=g(f({\bf y}))$
is differentiable at ${\bf x}$ with
\[D(g\circ f)({\bf x})=(D(g)(f({\bf x}))D(f)({\bf x}).\]
\end{Theorem}
\begin{Corollary} (i) If $f,g:{\mathbb R}^{n}\rightarrow{\mathbb R}^{m}$
are differentiable at ${\bf x}\in{\mathbb R}^{n}$ then so is
$f+g$ and
\[D(f+g)({\bf x})=D(f)({\bf x})+D(g)({\bf x}).\]
(ii) If $f,g:{\mathbb R}\rightarrow{\mathbb R}$ are differentiable
at $x$ then so is their product and
\[\frac{d(f(x)g(x))}{dx}=g(x)\frac{d(f(x))}{dx}
+f(x)\frac{d(g(x))}{dx}.\]
\end{Corollary}
In the matrix notation of Lemma \ref{MatNot} the chain rule
of Theorem \ref{Chain} becomes
\[(g\circ f)_{i,k}({\bf x})=\sum_{j=1}^{m}
g_{i,j}(f({\bf x}))f_{j,k}({\bf x}).\]
Still more briefly we may use the summation convention with
$i$ ranging from 1 to $p$, $j$ from 1 to $m$ and $k$ from
1 to $n$ to write
\[(g\circ f)_{i,k}({\bf x})=
g_{i,j}(f({\bf x}))f_{j,k}({\bf x}).\]
If $g$ is a real function of the real variables
$y_{1}$, $y_{2}$,\ldots,$y_{m}$
which are themselves functions of the real variables
$x_{1}$, $x_{2}$,\ldots,$x_{n}$
we recover the traditional form
\[\frac{\partial g}{\partial x_{k}}=
\frac{\partial g}{\partial y_{1}}\frac{\partial y_{1}}{\partial x_{k}}+
\frac{\partial g}{\partial y_{2}}\frac{\partial y_{2}}{\partial x_{k}}+
\ldots +
\frac{\partial g}{\partial y_{m}}\frac{\partial y_{m}}{\partial x_{n}}
.\]
If $f:{\mathbb C}\rightarrow{\mathbb C}$ then, just as in the real case
we may define the derivative $f'(z)$ by
\[f'(z)=\lim_{h\rightarrow 0}
\frac{f(z+h)-f(z)}{h}\]
where it exists (a ${\mathbb C}\rightarrow{\mathbb C}$ differentiable
function is also called analytic). Since ${\mathbb C}$ and ${\mathbb R}$
are so similar those parts of the real theory which ought
to go over to the complex case do.
\begin{Example} If $P$ and $Q$ are polynomials and $Q$ has
roots at $\omega_{1},\ \omega_{2},\ldots,\ \omega_{n}$ then
$P/Q$ is differentiable at all
$z\in{\mathbb C}\setminus\{\omega_{1},\ \omega_{2},\ldots,\ \omega_{n}\}$.
\end{Example}
As the next theorem shows analytic functions are rather special.
\begin{Theorem}[Characterisation of Analytic Functions]
\label{CharAnFn}
Let $f:{\mathbb C}\rightarrow{\mathbb C}$ be given and write
\[f(x+iy)=u(x,y)+iv(x,y)\]
with $x$, $y$, $u$, $v$ real. Let $z_{0}=x_{0}+iy_{0}$.
The following statements are equivalent.
(i) $f$ is differentiable as a function ${\mathbb C}\rightarrow{\mathbb C}$
at $z_{0}$.
(ii) $f(z_{0}+h)=f(z_{0})+\lambda h+\epsilon(h)|h|$ with
$\lambda\in{\mathbb C}$ and $|\epsilon(h)|\rightarrow 0$ as
$|h|\rightarrow 0$. (We have $f'(z_{0})=\lambda$.)
(iii) Near $z_{0}$, $f$ is the composition of translations,
rotations, and (possibly zero) dilatations with an error which decreases
faster than linear.
(iv) The function $(u,v):{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
is differentiable at $(x_{0},y_{0})$ with derivative
\[D(u,v)(x_{0},y_{0})=l\alpha\]
where $l\geq 0$ and $\alpha\in SO(2)$ the special orthogonal group.
(v) (Cauchy Riemann Equations)
The function $(u,v):{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
is differentiable at $(x_{0},y_{0})$ and (at that point)
\[\frac{\partial u}{\partial x}=\frac{\partial v}{\partial y},\ \
\frac{\partial v}{\partial x}=-\frac{\partial u}{\partial y}.\]
(vi) The function $(u,v):{\mathbb R}^{2}\rightarrow{\mathbb R}^{2}$
is differentiable at $(x_{0},y_{0})$ and (at that point)
\[\frac{\partial u}{\partial X}=\frac{\partial v}{\partial Y},\ \
\frac{\partial v}{\partial X}=-\frac{\partial u}{\partial Y}.\]
for any orthogonal coordinate system $(X,Y)$.
\end{Theorem}
If $f'(z_{0})\neq 0$ then $(iii)$ tells us that $f$ is locally
conformal (i.e. angle preserving).
\begin{Example} (i) The map $z\mapsto z^{2}$ is analytic but is not
conformal at 0.
(ii) The mapping $z\mapsto z^{*}$ is a reflection and nowhere
analytic. It preserves the magnitude of angles but changes
their sense.
\end{Example}
\begin{Example} {\bf (This non-examinable example shows the need
for a rigorous treatment of the calculus)} Define
$f:{\mathbb Q}\rightarrow{\mathbb Q}$ by
\begin{eqnarray*}
f(q)&=&q\ \ \mbox{if $q<0$ or $q^{2}<2$,}\\
f(q)&=&q-3\ \ \mbox{otherwise.}
\end{eqnarray*}
Then, considered as a function ${\mathbb Q}\rightarrow{\mathbb Q}$,
$f$ is continuous, yet $f(1)=1$, $f(2)=-1$ and $f(t)\neq 0$
for all $t\in{\mathbb Q}$ with $1\leq t\leq 2$. Further,
considered as a function ${\mathbb Q}\rightarrow{\mathbb Q}$,
$f$ is differentiable with derivative 1 everywhere, but $f(1)>f(2)$
so $f$ is not everywhere increasing.
\end{Example}
Inspite of this, we shall be able next year
to use the Fundamental Axiom (Axiom \ref{Ax}) to prove
rigorously the following two fundamental theorems.
\begin{Theorem}[Intermeadiate Value Theorem] (Proof next
year.) If $f:[a,b]\rightarrow{\mathbb R}$ is continuous
and $f(a)\leq d\leq f(b)$ then there exists a $c$ with
$a\leq c\leq b$ and $f(c)=d$.
\end{Theorem}
\begin{Theorem} (Proof next year.)\label{contmax}
If $f:[a,b]\rightarrow{\mathbb R}$ is continuous
then $f$ is bounded and attains its bounds. In other words
we can find $c_{1}$ and $c_{2}$ with $a\leq c_{1},c_{2}\leq b$
and $f(c_{1})\leq f(t)\leq f(c_{2})$ for all $t\in [a,b]$.
\end{Theorem}
We use Theorem \ref{contmax} to give rigorous proofs of the
results that follow.
\begin{Theorem}[Rolle's Theorem]
If $f:[a,b]\rightarrow{\mathbb R}$ is continuous, $f$ is differentiable
on $(a,b)$ and $f(a)=f(b)$ then there exists a $c$ with
$aa$ and $f:[a,b]\rightarrow{\mathbb R}$
is continuous and $f$ is differentiable on $(a,b)$.
(i) If $|f'(t)|\leq M$ for all $a0$ for all $af(a)$.
\end{Corollary}
\begin{Corollary} If $f$ and $g$ are differentiable functions
$(a,b)\rightarrow{\mathbb R}$ with $f'(t)=g'(t)$ for all $a0$.
(ii) $\log(xy)=\log(x)+\log(y)$ for all $x,y>0$.
(iii) $\log(x)$ increases strictly from $-\infty$ to $\infty$
as $x$ increases from $0$ to $\infty$.
\end{Theorem}
\begin{Corollary} The additive group $({\mathbb R},+)$ and the
multiplicative group $((0,\infty),\times)$ are isomorphic.
\end{Corollary}
\begin{Theorem} If we set $x^{\alpha}=\exp(\alpha\log(x))$ for
$x>0$, $\alpha\in{\mathbb R}$ then
(i) $x^{\alpha}y^{\alpha}=(xy)^{\alpha}$,
(ii) $x^{\alpha}x^{\beta}=x^{(\alpha+\beta)}$,
(iii) $(x^{\alpha})^{\beta}=x^{\alpha\beta}$,
(iv) $x^{1}=x$,
\noindent
for all $x,y>0$, $\alpha,\beta\in{\mathbb R}$. Further, if
$\alpha\in{\mathbb R}$ is fixed, $x^{\alpha}$ is differentiable
with
\[\frac{dx^{\alpha}}{dx}=\alpha x^{\alpha-1}.\]
\end{Theorem}
\begin{Theorem} If we write
\begin{eqnarray*}
\sin(x)&=&\sum_{n=0}^{\infty}\frac{(-1)^{n}x^{2n+1}}{(2n+1)!},\\
\cos(x)&=&\sum_{n=0}^{\infty}\frac{(-1)^{n}x^{2n}}{(2n)!},
\end{eqnarray*}
then $\sin$ and $\cos$ are well defined differentiable
functions ${\mathbb R}\rightarrow{\mathbb R}$ such that
(i) $\sin'(x)=\cos(x)$ and $\cos'(x)=-\sin(x)$,
(ii) $\sin(x+y)=\sin(x)\cos(y)+\cos(x)\sin(y)$,
$\cos(x+y)=\cos(x)\cos(y)-\sin(x)\sin(y)$ and
$(\cos(x))^{2}+(\sin(x))^{2}=1$,
for all $x,y\in{\mathbb R}$.
Moreover there exists a real number
$\pi$ such that $2<\pi<4$ and (for $x\in{\mathbb R}$)
(iii) $\cos(x)\geq 0$ for $0\leq x<\pi/2$, $\cos(\pi/2)=0$,
(iv) $\sin(x)=\cos(x-\pi/2)$, $\cos(x+\pi)=-\cos(x)$,
(v) If $u,v\in{\mathbb R}$ and $u^{2}+v^{2}=1$ then the equation
$(u,v)=(\cos(\theta),\sin(\theta))$ has exactly one solution
$\theta$ with $0\leq\theta<2\pi$.
\end{Theorem}
\begin{Corollary} If $(x,y)\in{\mathbb R}^{2}$ and $(x,y)\neq(0,0)$
then there is a unique $r>0$ and a unique $\theta$ with
$0\leq\theta<2\pi$ with $x=r\cos(\theta)$, $x=r\sin(\theta)$.
\end{Corollary}
\begin{Theorem} We can extend the function $\exp$ in a consistent
manner to a function ${\mathbb C}\rightarrow{\mathbb C}$ by setting
\[\exp(z)=\sum_{n=0}^{\infty}\frac{z^{n}}{n!}\]
With this definition,
(i) $\exp$ is differentiable with $\exp'(z)=\exp(z)$ for all
$z\in{\mathbb C}$.
(ii) $\exp(z_{1}+z_{2})=\exp(z_{1})\exp(z_{2})$ for all
$z_{1},z_{2}\in{\mathbb C}$.
(iii) If $x$ and $y$ are real, then
\[exp(x+iy)=\exp(x)(\cos(y)+i\sin(y)).\]
\end{Theorem}
\begin{Theorem} We can extend the functions $\cos$ and $\sin$
in a consistent
manner to functions ${\mathbb C}\rightarrow{\mathbb C}$ by setting
\begin{eqnarray*}
\sin(z)&=&\frac{\exp(iz)-\exp(-iz)}{2i},\\
\cos(z)&=&\frac{\exp(iz)+\exp(-iz)}{2}.
\end{eqnarray*}
With these definitions,
(i) $\cos$ and $\sin$ are differentiable with
$\sin'(z)=\cos(z)$ and $\cos'(z)=-\sin(z)$,
(ii) $\sin(z+w)=\sin(z)\cos(w)+\cos(z)\sin(w)$,
$\cos(z+w)=\cos(z)\cos(w)-\sin(z)\sin(w)$ and
$(\cos(z))^{2}+(\sin(z))^{2}=1$,
for all $z,w\in{\mathbb C}$.
\end{Theorem}
\begin{Lemma} If $x$ is real, $\cosh(x)=\cos(ix)$ and
$\sinh(x)=-i\sin(ix)$.
\end{Lemma}
\begin{Theorem} (i) If $w\in{\mathbb C}$ the equation $z^{2}=w$
has a solution. However there is no continuous function
$f:{\mathbb C}\rightarrow{\mathbb C}$ such that $f(w)^{2}=w$
for all $w\in{\mathbb C}$.
(ii) The equation $\exp(z)=0$ has no solution. If $w\neq 0$
then the set of solutions for $\exp(z)=w$ has the form
\[\{\log(|w|)+i(\theta+2n\pi):\ n\in{\mathbb Z}\}\]
for some real $\theta$. However there is no continuous function
$g:{\mathbb C}\setminus\{0\}\rightarrow{\mathbb C}$
such that $\exp(g(w))=w$ for all $w\in{\mathbb C}\setminus\{0\}$.
\end{Theorem}
\begin{Definition} A continous map
${\bf r}:[a,b]\rightarrow{\mathbb R}^{3}$ is called a curve
in ${\mathbb R}^{3}$. If ${\bf r}(a)={\bf r}(b)$ we say
that the curve is closed. We agree to identify (i.e. consider
as the same) the two curves
${\bf r}_{1}:[a_{1},b_{1}]\rightarrow{\mathbb R}^{3}$ and
${\bf r}_{2}:[a_{2},b_{2}]\rightarrow{\mathbb R}^{3}$ if there
exists a continuous bijective function
$\gamma:[a_{1},b_{1}]\rightarrow[a_{2},b_{2}]$ with
$\gamma(a_{1})=a_{2}$ and $\gamma(b_{1})=b_{2}$
such that ${\bf r}_{1}(t)={\bf r}_{2}(\gamma(t))$.
\end{Definition}
\begin{Example} If the curves
${\bf r}_{j}:[0,1]\rightarrow{\mathbb R}^{3}$
are defined by
\begin{eqnarray*}
{\bf r}_{1}(t)&=&(\cos(2\pi t), \sin(2\pi t),0),\\
{\bf r}_{2}(t)&=&(\cos(2\pi t^{2}), \sin(2\pi t^{2}),0),\\
{\bf r}_{3}(t)&=&(\cos(4\pi t), \sin(4\pi t),0),
\end{eqnarray*}
then ${\bf r}_{1}$ and ${\bf r}_{2}$ represent the same
curve but ${\bf r}_{3}$
represents a different curve.
\end{Example}
We say that ${\bf r}_{1}$ and ${\bf r}_{2}$ in the preceding
definition are two parameterisations of the same curve.
\begin{Definition}
If ${\bf r}:[0,l]\rightarrow{\mathbb R}^{3}$ is continuous
and
\[\left|\left|\frac{{\bf r}(s+\delta s)-{\bf r}(s)}{\delta s}
\right|\right|\rightarrow 1\]
as $\delta s\rightarrow 0$ we say that ${\bf r}$ is an arc length
parameterisation of the curve.
\end{Definition}
Just as in the case of functions
$f:{\mathbb R}\rightarrow{\mathbb R}$ where it is natural to identify
the linear map $Df(t):{\mathbb R}\rightarrow{\mathbb R}$ which
takes $h$ to $f'(t)h$ with the real number $f'(t)$ so,
in the case of functions ${\bf r}:[0,l]\rightarrow{\mathbb R}^{3}$,
it is natural to identify the linear map
$D{\bf r}(t):[0,l]\rightarrow{\mathbb R}^{3}$ which takes
takes $h$ to $(\dot{r_{1}}(t)h,\dot{r_{2}}(t)h,\dot{r_{3}}(t)h)$ with
the vector
$\dot{{\bf r}}(t)=(\dot{r_{1}}(t),\dot{r_{2}}(t),\dot{r_{3}}(t))$.
We observe that (when ${\bf r}$ is well behaved)
\[\left|\left|\frac{{\bf r}(t+\delta t)-{\bf r}(t)}{\delta t}
-\dot{{\bf r}}(t)\right|\right|\rightarrow 0\]
as $\delta t\rightarrow 0$.
\begin{Theorem} If ${\bf r}:[0,l]\rightarrow{\mathbb R}^{3}$ is
an arc length parameterisation of a well behaved curve then
\[\dot{{\bf r}}(s)={\bf t}(s),\]
where ${\bf t}(s)$ is a unit vector defining the direction
of the tangent. Either $\dot{{\bf t}}(s)={\bf 0}$, or
\[\dot{{\bf t}}(s)=\frac{{\bf n}(s)}{\rho(s)},\]
where ${\bf n}(s)$ is a unit vector, perpendicular to
${\bf t}(s)$, defining the direction
of the normal. The scalar $\rho(s)>0$ is called the radius
of curvature (not in the syllabus).
\end{Theorem}
\begin{Theorem}[Taylor's Theorem With Integral Remainder] Suppose that
$f:{\mathbb R}\rightarrow{\mathbb R}$ is $n+1$ times continuously
differentiable. Then
\[f(x)=f(a)+f'(a)(x-a)+\frac{f''(a)(x-a)^{2}}{2!}
+\frac{f'''(a)(x-a)^{3}}{3!}+\ldots+\frac{f^{(n)}(a)(x-a)^{n}}{n!}
+R_{n}(f,x),\]
where
\[R_{n}(f,x)=\frac{1}{n!}\int_{a}^{x}f^{(n+1)}(t)(x-t)^{n}dt.\]
\end{Theorem}
\begin{Corollary}[Local Taylor Theorem]
\label{LocTayThm}
If $f:{\mathbb R}\rightarrow{\mathbb R}$ is $n$
times continuously differentiable then, given any $\epsilon>0$,
there exists a $\delta(\epsilon,a)$ such that
\[\left|f(x)-\left(f(a)+f'(a)(x-a)+\frac{f''(a)(x-a)^{2}}{2!}
\ldots+\frac{f^{(n)}(a)(x-a)^{n}}{n!}
\right)\right|
<\epsilon|x-a|^{n}\]
for all $x$ with $|x-a|<\delta(\epsilon,a)$.
\end{Corollary}
\begin{Theorem}[Binomial Theorem] If $\alpha$ and and $x$ are real
and $|x|<1$ then
\[(1-x)^{\alpha}=\sum_{n=0}^{\infty}
\frac{\alpha(\alpha-1)(\alpha-2)\ldots(\alpha-(n-1))}{n!}x^{n}.\]
If $\alpha$ is a positive integer there are only a finite
number of non-zero terms; otherwise the sum diverges if $|x|>1$.
\end{Theorem}
\begin{Example}[Cauchy] Let
\begin{eqnarray*}
E(x)&=&\exp(-1/x^{2})\ \ \mbox{for $x\neq 0$,}\\
E(0)&=&0.
\end{eqnarray*}
Then $E$ is infinitely differentiable with
\begin{eqnarray*}
E^{(n)}(x)&=&Q_{n}(1/x)\exp(-1/x^{2})\ \ \mbox{for $x\neq 0$,}\\
E^{(n)}(0)&=&0,
\end{eqnarray*}
where $Q_{n}$ is a polynomial.
The Taylor expansion
\[E(x)=\sum_{n=0}^{\infty}\frac{E^{(n)}(0)}{n!}x^{n}\]
is only valid at the single point $x=0$.
\end{Example}
Next year we shall see that analytic functions (i.e. differentiable
functions $f:{\mathbb C}\rightarrow{\mathbb C}$) form such a restricted
class that a Taylor expansion is always possible. (We shall make
no essential use of this result so it serves mainly as a trailer for
sensational results to come.)
\begin{Theorem}[Taylor's Theorem For Analytic Functions]
\label{TayAn}
If $f:\{z\in{\mathbb C}:\ |z|0$,
there exists a $\delta(\epsilon,a)$ such that
\[\left|\left|f_{i}({\bf x})-\left(f_{i}({\bf a})+
\sum_{1\leq j\leq n}f_{i,j}({\bf a})(x_{j}-a_{j})
+\sum_{1\leq j,k\leq n}\frac{f_{i,jk}({\bf a})(x_{j}-a_{j})(x_{k}-a_{k})}
{2!}+\ldots\right.\right.\right.\]
\[\ \ \ \ \ \ \ \left.\left.\left.+\sum_{1\leq j,k,\ldots,p\leq n}
\frac{f_{i,jk\ldots p}({\bf a})(x_{j}-a_{j})(x_{k}-a_{k})\ldots
(x_{p}-a_{p})}{N!}
\right)\right|\right|
<\epsilon||{\bf x-a}||^{N}.\]
\end{Theorem}
By a careful exploitation of these ideas (which we leave to next
year) it is possible to prove the following useful result.
\begin{Theorem}[Partial Differentiation Commutes]
\label{DivCom}
Let $f$ be a function ${\mathbb R}\rightarrow{\mathbb R}^{m}$.
If $f$ is sufficiently well behaved (more exactly if $f$
has continuous second partial derivatives)
then $f_{,jk}=f_{,kj}$.
\end{Theorem}
In other words
\[\frac{\partial^{2}f}{\partial x_{k}\partial x_{j}}=
\frac{\partial^{2}f}{\partial x_{j}\partial x_{k}}.\]
Applied to the function of Theorem \ref{LMTT} the result
shows that $f_{i,jk}=f_{i,kj}$ and that the higher order terms
have similar symmetries. Note that some condition on
the behaviour of $f$ is required in Theorem \ref{DivCom} since
it is possible to construct pathological functions for
which $f_{,jk}$ and $f_{,kj}$ exist but are not equal.
Combining Theorem \ref{TayAn} which says that analytic functions
are well behaved, with Theorem \ref{DivCom} which says that, for
well behaved functions, the partial derivatives commute and
with the Cauchy Riemann equations of Theorem \ref{CharAnFn}
we obtain the following result which turns out
to be very important for later work.
\begin{Theorem}
\label{Harm}
If $f:{\mathbb C}\rightarrow{\mathbb C}$ is a well
behaved analytic function and we write $f(x+iy)=u(x,y)+iv(x,y)$
as in Theorem \ref{CharAnFn} then $u$ and $v$ both satisfy Laplace's
equation
\[\frac{\partial^{2} \phi}{\partial x^{2}}+
\frac{\partial^{2} \phi}{\partial y^{2}}=0.\]
\end{Theorem}
Functions which satisfy Laplace's equation are called harmonic.
In the second half of this course
you will see that the
converse to Theorem \ref{Harm} holds and every harmonic
function is (at least locally) the real part of an analytic
function.
We now return to the local Taylor expansion of Theorem \ref {LMTT}
in the special but important case when $m=1$ concentrating
particularly on the cases when $n\leq 3$ which are
easiest to visualise. If $f:{\mathbb R}^{n}\rightarrow{\mathbb R}$
is well behaved Theorem \ref {LMTT} tells us that
\[f({\bf x})=f({\bf a})+
\sum_{1\leq j\leq n}f_{,j}({\bf a})(x_{j}-a_{j})
+\sum_{1\leq j,k\leq n}\frac{f_{,jk}({\bf a})(x_{j}-a_{j})(x_{k}-a_{k})}
{2!}+error,\]
where the error term decreases faster than $||{\bf x-a}||^{2}$
as ${\bf x}$ approaches ${\bf a}$. Bearing in mind the symmetry
of the second derivative, we may write this in matrix
terms as
\[f({\bf a+h})=f({\bf a})+{\bf b}^{T}{\bf h}
+{\textstyle \frac{1}{2}}{\bf h}^{T}B{\bf h}+error,\ \ \ (*')\]
where ${\bf h}$ is a column vector, ${\bf b}$ is a column vector
with $b_{j}=f_{,j}({\bf a})$ and $B$ is an $n\times n$ symmetric
matrix with $b_{jk}=f_{,jk}({\bf a})$. We call $B$ the
Hessian matrix. We give ${\bf b}$ the name grad$(f)$,
$\nabla f$, or $\nabla f({\bf a})$ so that $(*)$ becomes
\[f({\bf a+h})=(f({\bf a})+\nabla f({\bf a})^{T}{\bf h}
+{\textstyle \frac{1}{2}}{\bf h}^{T}B{\bf h}+error,\ \ \ (*'')\]
Alternatively we may write the equation in
terms of linear maps as
\[f({\bf a+h})=f({\bf a})+(Df({\bf a}))({\bf h})
+{\textstyle \frac{1}{2}}{\bf h}.\beta{\bf h}+error,\ \ \ (**)\]
where the dot denotes inner product and $\beta$ is the linear
map with matrix $B$. Combining the ideas of $(*'')$ and $(**)$
gives yet annother form of the equation.
\[f({\bf a+h})=f({\bf a})+\nabla f({\bf a}).({\bf h})
+{\textstyle \frac{1}{2}}{\bf h}.\beta{\bf h}+error.\ \ \ (*)\]
To help follow the rest of this discussion the reader
should draw contour lines (that is lines in ${\mathbb R}^{2}$
on which $f$ is constant) for $f$ when $n=2$ and imagine
contour surfaces (that is surfaces in ${\mathbb R}^{3}$
on which $f$ is constant) for $f$ when $n=3$.
Observe that $(*)$ tells us that
\[f({\bf a}+\delta{\bf a})-f({\bf a})=
\nabla f({\bf a}).\delta{\bf a}+error,\]
or, still more briefly
\[\delta f=\nabla f.\delta{\bf a}+error,\]
where the error decreases faster than linearly.
If we move along a line (or surface) on which $f$ is constant
then $\delta f=0$ so $\nabla f$ must be perpendicular to
contour lines (or surfaces). Working in this
context it is clear that $\nabla f$ is a vector in the
direction that $f$ changes most rapidly of length
proportional to the rate of change of $f$ in that
direction\footnote{WARNING The geometric introduction of $\nabla f$
above depends on using a particular inner product. $\nabla f$
behaves as a vector so long as we confine ourselves
to orthogonal changes of coordinates. If we change our
scales of measurement then it behaves in an unexpected
manner. The Pure Mathematician explains this by saying
that $\nabla f$ is indeed a vector but lives in a dual space
and the Applied Mathematician explains this by saying
that $\nabla f$ is indeed a vector but of contravariant
type. Fortunately the problem does not
arrise until Part II, and, possibly, not even then, so you
may snopake this footnote out and forget it.}
So long as $\nabla f\neq{\bf 0}$ the linear term
$\nabla f.\delta{\bf h}$ dominates all the other
non-constant terms in $(*)$ (so, in particular,
we cannot have a maximum or a minimum at such
a point). At, so called, stationary points
$\nabla f={\bf 0}$ and $(*)$ becomes
\[f({\bf a+h})-f({\bf a})=
{\textstyle \frac{1}{2}}{\bf h}.\beta{\bf h}+error.\ \ \ (*)\]
If we consider the case when $n=2$ and write $(*)$ in coordinate
form we get
\[f(a_{1}+h_{1},a_{2}+h_{2})-f(a_{1},a_{2})=
{\textstyle \frac{1}{2}}(f_{,11}h_{1}^{2}+
2f_{,12}h_{1}h_{2}+f_{,22}h_{2}^{2})+error,\]
with the error decreasing faster than quadratically.
Using the ideas of the previous course we see that
(i) If $f_{,11}>0$ and $\det B=f_{,11}f_{,22}-f_{,12}^{2}>0$
then the Hessian
\[B=\left(\begin{array}{cc}f_{,11}&f_{,12}\\
f_{,21}&f_{,22}\end{array}\right)\]
is positive definite and ${\bf a}$ is a minimum.
(ii) If $f_{,11}<0$ and $\det B<0$ then the Hessian is
negative definite and ${\bf a}$ is a maximum.
(iii) If $f_{,11}$ and $\det B$ are non-zero and of opposite
signs then ${\bf a}$ is not a maximum nor a minimum.
(We have a saddle point.)
(iv) In all other cases the behaviour depends on higher order
terms. (But see part (ii) of Remark \ref{perturb} below.)
\noindent No new phenomena emerge in higher dimensions ($n\geq 3$)
but the calculations become a bit more complicated.
We make the following remarks which are non-examinable
and will be proved by hand waving.
\begin{Remark}
\label{perturb}
(i) If $f:{\mathbb R}\rightarrow{\mathbb R}$ is a
well behaved function and
$\epsilon>0$ there exists a well behaved perturbation
$\tilde{f}:{\mathbb R}\rightarrow{\mathbb R}$ such that
$|f(t)-\tilde{f}(t)|<\epsilon$ for all $t\in{\mathbb R}$
and all the stationary
points of $\tilde{f}$ are maxima and minima.
(ii) If $g:{\mathbb R}^{2}\rightarrow{\mathbb R}$ is a well behaved
function and $\epsilon>0$ there exists a well behaved perturbation
$\tilde{g}:{\mathbb R}^{2}\rightarrow{\mathbb R}$ such that
$|g({\bf t})-\tilde{g}({\bf t})|<\epsilon$ for all
${\bf t}\in{\mathbb R}^{2}$ and, at each stationary point
the matrix
\[\left(\begin{array}{cc}
\tilde{g}_{,11}&\tilde{g}_{,12}\\
\tilde{g}_{,21}&\tilde{g}_{,22}\\
\end{array}\right)\]
is invertible and $\tilde{g}_{,11}\neq 0$.
\end{Remark}
The following theorem (which is not in the syllabus) indicates
why saddle points cannot be perturbed away.
\begin{Theorem}[Lakes, Peaks and Passes] Consider the sphere
\[S_{2}=\{{\bf x}\in{\mathbb R}^{3}:\ ||{\bf x}||=1\}.\]
If $f:S_{2}\rightarrow{\mathbb R}$ is well behaved
with $L$ minima, $P$ maxima and $S$ saddle points
then $P-S+L=2$.
\end{Theorem}
More generally the number $P-S+L$ is a `topological invariant'
for the associated surface.
\end{document}