\documentclass[12pt,a4paper]{article}
\usepackage{amssymb,amsmath,amsthm,latexsym}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{convention}[theorem]{Convention}
\newtheorem{statement}[theorem]{Statement}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{axiom}[theorem]{Axiom}
\theoremstyle{definition}
\newtheorem{question}[theorem]{Q}
\newcommand{\im}{\operatorname{im}}
\newcommand{\Char}{\operatorname{char}}
\newcommand{\supp}{\operatorname{supp}}
\newcommand{\singsupp}{\operatorname{singsupp}}
\begin{document}
\title{Partial Differential Equations}
\author{T.~W.~K\"{o}rner after Joshi and Wassermann}
\maketitle
\begin{footnotesize}
\noindent
{\bf Small print}
These notes are a digest of much more complete
notes by M.~S.~Joshi and A.~J.~Wassermann
which are also being issued for this course.
I should {\bf very much} appreciate being told
of any corrections or possible improvements
and might even part with a small reward to the
first finder of particular errors. This document
is written in \LaTeX2e and stored in the file labelled
\verb+~twk/IIB/PDE.tex+ on emu in (I hope) read permitted form.
My e-mail address is \verb+twk@dpmms+.
\end{footnotesize}
\tableofcontents
\section{Introduction}
\subsection{Generalities} When studying ordinary
(i.e. not partial) differential equations
we start with linear differential equations with
constant coefficients. We then study linear
differential equations and then plunge into
a boundless ocean of non-linear differential equations.
The reader will therefore not be surprised if most of a first
course on the potentially much more complicated
study of partial differential equations should limit
itself essentially to linear partial differential equations.
A linear partial differential equation is an equation
of the form $Pu=f$ where $u$ and $f$ are
suitable functions from ${\mathbb R}^{n}$ to
${\mathbb R}$ and
\[P=\sum_{|\alpha|\leq k}a_{\alpha}(x)\partial^{\alpha}.\]
Our definition introduces some of the condensed notation
conventions which make the subject easier for the expert
and harder for the beginner. The \emph{multi-index}
\[\alpha=(\alpha_{1},\ \alpha_{2},\ \dots,\ \alpha_{n})
\in{\mathbb N}^{n}.\] We write
\[|\alpha|=\sum_{j=1}^{n}\alpha_{j}\]
and
\[\partial^{\alpha}u=\frac{\partial^{|\alpha|}}
{\partial x_{1}^{\alpha_{1}}\partial x_{2}^{\alpha_{2}}
\dots\partial x_{n}^{\alpha_{n}}}.\]
Sometimes we write $\partial^{\alpha}u=
\tfrac{\partial^{|\alpha|}u}{\partial x}$.
Although the first part of the course will deal
with `first order' linear partial differential equations
without restrictions on the coefficients (and indeed
even with slightly more general partial differential equations)
the main part of the course will deal with
linear partial differential equations
with constant coefficients. The main tools used
will be Laurent Schwartz's theory of distributions
and the Fourier transform.
The fact that we do not deal with non-linear equations
does not mean that they are not important.
The equations of general relativity are non-linear
and at a more mundane level the Navier-Stokes
equation of fluid dynamics
\[\frac{\partial u}{\partial t}
-\Delta u+u.\Delta u=f-\Delta p, \ \Delta.u=0\]
is non-linear.
We call $P=\sum_{|\alpha|\leq k}a_{\alpha}(x)\partial^{\alpha}$
a differential operator (or just an `operator').
We look at three such operators in detail.
The first which must be
most studied non-trivial differential operator
in mathematics is the
Laplacian $\Delta$ known in more old fashioned texts as
$\nabla^{2}$ and defined by
\[\Delta u=\sum_{j=1}^{n}\frac{\partial^{2}u}{\partial x_{j}^{2}}\]
where $u:{\mathbb R}^{n}\rightarrow{\mathbb R}$ is a suitable
function. The second
is the wave operator $\Box$ given by
\[\Box u(t,{\mathbf x})=\frac{\partial^{2}u}{\partial t^{2}}(t,{\mathbf x})
-\Delta_{\mathbf x} u(t,{\mathbf x})\]
where $t\in{\mathbb R}$, ${\mathbf x}\in{\mathbb R}^{n}$
and $u:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
is a suitable function. The third is the heat
operator $J$ given by
\[Ju(t,{\mathbf x})=\frac{\partial u}{\partial t}(t,{\mathbf x})
-\Delta_{\mathbf x} u(t,{\mathbf x}).\]
The notations $\Delta$ and $\Box$ are standard but
the notation $J$ is not.
\subsection{The symbol}
A key concept in more advanced work is the \emph{total symbol}
$\sigma(P)$
of a linear partial differential operator
\[P(x,\partial)=\sum_{|\alpha|\leq k}a_{\alpha}(x)\partial^{\alpha}\]
obtained by replacing $\frac{\partial\ }{\partial x_{j}}$
by $i\xi_{j}$ so that
\[\sigma(P)=p(x,\xi)=
\sum_{|\alpha|\leq k}a_{\alpha}(x)(i\xi)^{\alpha}
=e^{-ix.\xi}P(e^{ix.\xi}).\]
Note that $x$ and $\xi$ are $n$ dimensional vectors
and that we use the convention
\[y^{\alpha}=\prod_{j=1}^{n}y_{j}^{\alpha_{j}}.\]
To see where the symbol comes from, observe that
taking Fourier transforms
\[\widehat{Pu}(\xi)=p(x,\xi)\hat{u}(\xi)\]
and so taking inverse Fourier transforms
\[P(x,\partial)u=\left(\frac{1}{2\pi}\right)^{n/2}
\int_{{\mathbb R}^{n}}e^{ix.\xi}p(x,\xi)\hat{u}(\xi)\,d\xi.\]
Applying the Leibnitz rule we get the following result.
\begin{lemma}[Proposition 1] If $P$ and $Q$ are
linear partial differential operators
\[\sigma(PQ)(x,\xi)
=\sum\frac{(-i)^{|\alpha|}}{\alpha !}
\partial_{\xi}^{\alpha}\sigma(P)(x,\xi)
\partial_{x}^{\alpha}\sigma(Q)(x,\xi).\]
\end{lemma}
Here as one might expect $\alpha !=\prod_{j=1}^{n}\alpha_{j}! $.
We say that the differential operator
\[P(x,\partial)=\sum_{|\alpha|\leq k}a_{\alpha}(x)\partial^{\alpha}\]
has order $k$ and that it has principal symbol
\[\sigma_{k}(P)(x,\xi)=
\sum_{|\alpha|=k}a_{\alpha}(x)(i\xi)^{\alpha}.\]
Notice that if $P$ has degree $k$ and $Q$ degree $l$
then the principal symbol of $PQ$ is given by
the simple formula
\[\sigma_{l+k}(PQ)=\sigma_{k}(P)\sigma_{l}(Q).\]
If the principal symbol is never zero or only
vanishes to first order then the operator is said to
be of \emph{principal type}. In more advanced work
it is shown that when the operator is of principal
type the lower order terms have little effect on
the qualitative behaviour of the associated partial
differential equation.
We define the \emph{characteristic set} to be the
subset of ${\mathbb R}^{n}\times{\mathbb R}^{n}$
where the principal symbol vanishes.
\[\Char(P)=\{(x,\xi)\, :\, \sigma_{k}(P)(x,\xi)=0\}.\]
We say that $P$ is \emph{elliptic} at $x$
if the principle symbol $\sigma_{k}(P)(x,\xi)\neq 0$
for $\xi\neq 0$. If $P$ is \emph{elliptic} at $x$
for all $x$ we say that $P$ is elliptic.
The reason for the use of the word `elliptic'
may be traced to the symbols of the three
special operators.
\begin{alignat*}{2}
\sigma(\Delta)(\xi)&=-|\xi|^{2}&&\qquad\text{(Laplacian)}\\
\sigma(\Delta)(\tau,\xi)&=-\tau^{2}+|\xi|^{2}&&\qquad\text{(Wave)}\\
\sigma(J)&=i\tau+|\xi|^{2}&&\qquad\text{(Heat)}\\
\end{alignat*}
Traditionally second order operators which behaved like
the Laplacian were called elliptic,
those which behaved like the wave operator
were called hyperbolic and those that behaved
like the heat operator were called parabolic.
The distinction is very useful but the reference
to conic sections is not.
\begin{example}[Example 1] (i) We could have considered
complex valued $u$ in place of real valued $u$.
If we do this the operator
\[P=\frac{\partial\ }{\partial x}+i\frac{\partial\ }{\partial y}\]
has principal symbol
\[\sigma_{1}(P)(x,y:\xi,\eta)=i\xi-\eta\]
and so is elliptic.
(ii) The Laplacian $\Delta$ is elliptic but the
wave operator $\Box$ and the heat operator $J$ are not.
\[\Char(\Box)=\{(x,t,\tau,\xi)\in{\mathbb R}\times{\mathbb R}^{n}
\times{\mathbb R}\times{\mathbb R}^{n}
\, :\, \tau^{2}-|\xi|^{2}=0\}\]
\[\Char(J)=\{(x,t,\tau,\xi)\in{\mathbb R}\times{\mathbb R}^{n}
\times{\mathbb R}\times{\mathbb R}^{n}
\, :\, \tau-|\xi|^{2}=0\}\]
\end{example}
\section{Ordinary differential equations}
\subsection{The contraction mapping theorem}\label{Ordinary}
Hadamard
introduced the idea of a well posed problem into the study
of partial differential equations. According to
Hadamard a well posed problem must have a solution
which exists, is unique and varies continuously on
the given data. Without going too deeply into the matter
we may agree that these are reasonable matters to investigate.
Thus given a partial differential equation with
boundary conditions we shall study the following problems.
\emph{Existence} Can we show that there is a solution
in a neighbourhood of a given point? Can we show that
there exists a solution everywhere?
\emph{Uniqueness} Is the solution unique?
\emph{Continuity} Does $u$ depend continuously on
the boundary conditions? Does $u$ depend continuously on
on other elements of the problem?
\emph{Smoothness} How many times is $u$ differentiable?
Does $u$ have points of singularity? Does the solution
blow up in some way after a finite time?
We may illustrate these ideas in the case of ordinary
differential equations. Some of the material in this section
will be familiar from examination questions on the first
analysis course in 1B but you should consider it all
with the exception of the contraction mapping theorem
(Theorem~\ref{Contraction 1}) itself to be examinable.
In 1B we proved Banach's contraction mapping theorem.
\begin{theorem}[Theorem 1]\label{Contraction 1}
Let $(X,d)$ be a complete
non-empty metric space and $T:X\rightarrow X$ a map
such that $d(Tx,Ty)\leq k(x,y)$ for all $x,\ y\in X$
and some $k$ with $0\leq k<1$. Then there exists
a unique $x_{0}\in X$ such that $Tx_{0}=x_{0}$.
If $x\in X$ then $T^{n}x\rightarrow x_{0}$
as $n\rightarrow\infty$.
\end{theorem}
This can be strengthened as follows.
\begin{theorem}[Corollary 1] Let $(X,d)$ be a complete
non-empty metric space and $T:X\rightarrow X$ a map.
Suppose further
that there exists an integer $N\geq 1$ and a $k$
with $0\leq k<1$
such that
$d(T^{N}x,T^{N}y)\leq k(x,y)$ for all $x,\ y\in X$.
Then there exists
a unique $x_{0}\in X$ such that $Tx_{0}=x_{0}$.
If $x\in X$ then $T^{n}x\rightarrow x_{0}$
as $n\rightarrow\infty$.
\end{theorem}
Note that these theorems not only give a fixed point
but also give a method for finding it.
For the rest of section~\ref{Ordinary}
$f$ will be a function from
${\mathbb R}\times{\mathbb R}^{n}$. Let
\[E=\{t\in {\mathbb R}\, :\, |t-t_{0}|\leq a\}
\times\{x\in {\mathbb R}^{n}\, :\, \|x-x_{0}\|\leq b\}.\]
We assume that $f$ satisfies the Lipschitz condition
\[\|f(t,x_{1})-f(t,x_{2})\|\leq c\|x_{1}-x_{2}\|\]
on E.
\begin{exercise} (i) Show that a function that satisfies
a Lipschitz condition is continuous.
(ii) Show that any continuously differentiable function satisfies
a Lipschitz condition.
(iii) Show that a function that satisfies
a Lipschitz condition need not be differentiable everywhere.
(iv) Show that the function $f$ considered above is bounded on $E$.
\end{exercise}
We set $M=\sup_{(t,x)\in E}\|f(t,x)\|$ and $h=\min(a,bM^{-1})$.
\begin{theorem}\label{Solution}
If $f$ is as above the differential equation
\begin{equation*}
\frac{dx}{dt}=f(t,x),\ \ x(t_{0},0)=x_{0} \tag*{$\bigstar$}
\end{equation*}
has a unique solution for $|t-t_{0}|\leq h$.
\end{theorem}
\begin{example}(Here $x:{\mathbb R}\rightarrow{\mathbb R}$.)
(i) The differential equation
\[\frac{dx}{dt}=0,\ x(0)=0,\ x(1)=1\]
has no solution.
(ii) Show that the differential equation
\[\frac{dx}{dt}=x^{\frac{2}{3}},\ x(0)=0\]
has at least two solutions.
\end{example}
From now on we move away from 1B.
\begin{theorem}[Theorem 3]
The solution of $\bigstar$ in Theorem~\ref{Solution}
depends continuously on $x_{0}$. More formally,
if we define $T:{\mathbb R}\rightarrow C([t-h,t+h])$
by taking $Ty$ to be the solution of
\begin{equation*}
\frac{dx}{dt}=f(t,x),\ \ x(t_{0})=y
\end{equation*}
and give $C([t-h,t+h])$ the uniform norm then $T$ is
continuous.
\end{theorem}
In the special case of \emph{linear} ordinary differential equations
we can give a rather strong perturbation result.
\begin{theorem}[Theorem 4] We use the standard operator
norm on the space
${\mathcal L}={\mathcal L}({\mathbb R}^{n},{\mathbb R}^{n})$
of linear maps. Suppose that
$A,B:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathcal L}$
are continuous and that $M\geq \|A(t,x)\|,\ \|B(t,x)\|$ for all $t$ and
$x$.
\begin{align*}
\frac{d\xi}{dt}(t,x)&=A(t,x)\xi(t,x),\ \xi(t_{0},x)=a(x)\\
\frac{d\eta}{dt}(t,x)&=B(t,x)\eta(t,x),\ \eta(t_{0},x)=b(x)
\end{align*}
then if $|t-t_{0}|\leq K$
\[\|\xi(t,x)-\eta(t,x)\|\leq C(a,K,M)\|A-B\|(e^{M|t-t_{0}|}-1)
+\|a-b\|e^{M|t-t_{0}|}\]
where $C(a,K,M)$ depends only on $a$, $M$ and $K$.
\end{theorem}
\subsection{Vector fields, integral curves and flows}
Let $U$ be an open subset of ${\mathbb R}^{n}$.
A \emph{time dependent vector field} on $U$ is
a map
\[f:(-\epsilon,\epsilon)\times U\rightarrow{\mathbb R}^{n}\]
which associates a vector $f(t,x)$
to each time $t$ and point $x$.
Let $x_{0}\in U$. An \emph{integral curve} for the vector field
$f$ with starting point $x_{0}$ is a map
\[\phi:(-\delta,\delta)\rightarrow U\]
such that
\[\frac{d\phi}{dt}=f(t,\phi(t))\ \text{and}\ \phi(0)=x_{0}\]
so that the tangent vectors to $\phi$ are just the values
of the vector field at that point at time.
\begin{lemma} With the notation above, if $f$ has
a continuous derivative then an integral curve
always exists.
\end{lemma}
A \emph{local flow} for $f$ at $x_{0}$ is a map
\[\alpha:(-\delta,\delta)\times U_{0}\rightarrow U\]
where $\epsilon>\delta>0$, $U_{0}$ is open
and $x_{0}\in U_{0}\subseteq U$, such that
\begin{align*}
\frac{d\ }{dt}\alpha(t,x)=f(t,\alpha(t,x)),
\alpha(0,x)=x.
\end{align*}
Thus if $x$ is fixed $\alpha_{x}(t)=\alpha(t.x)$ is
an integral curve with starting point $x$.
In some sense a flow is a collection of integral
curves.
We note but shall not prove that the smoother the vector
field the smoother the associated flow.
\begin{theorem}[Theorem 5] If $f$ is $C^{k}$ and
\begin{align*}
\frac{d\ }{dt}\alpha(t,x)=f(t,\alpha(t,x)),
\alpha(0,x)=x.
\end{align*}
then $\alpha\in C^{k}$ $[1\leq k]$.
\end{theorem}
Now suppose $f$ does not depend on $t$.
Let $\alpha_{x}(t)=\alpha(t,x)$ as before.
\begin{lemma}\label{group}
With the notation just established
there exists an $\epsilon>0$ such that
\[\alpha_{t+s}(u)=\alpha_{t}(\alpha_{s}(u))\]
for all $|t|,|s|,|u|<\epsilon$.
\end{lemma}
It is clear from the proof of Lemma~\ref{group}
that the relation
\[\alpha_{t+s}=\alpha_{t}\circ\alpha_{s}\]
will hold whenever it is reasonable for it to hold
and in particular if the flow is determined for all
time we have a group action. This kind of group action
is called a dynamical system (see Course O6).
\begin{definition} Let $f:U\rightarrow{\mathbb R}^{n}$
be a vector field. We say that $x_{0}\in U$ is a
critical point of $f$ if $f(x_{0})=0$.
\end{definition}
\begin{lemma} Let $f:U\rightarrow{\mathbb R}^{n}$
be a (time independent) vector field.
(i) If $\phi$ is an integral curve passing through
a critical point $x_{0}$
then $\phi$ is constant.
(ii) Suppose $\phi$ is an integral curve defined
for all times $t$ and $\phi(t)\rightarrow x_{0}\in U$
as $t\rightarrow\infty$. Then $x_{0}$ is a critical point.
\end{lemma}
\subsection{First order linear and semi-linear PDEs}
Integral curves give us a method for solving first order
partial differential equations
\[\sum_{j=1}^{n}a_{j}(x)\frac{\partial u}{\partial x_{j}}=f(x).\]
In fact the method works for the slightly
more general \emph{semi-linear}
first order partial differential equation
\begin{equation*}
\sum_{j=1}^{n}a_{j}(x)\frac{\partial u}{\partial x_{j}}=f(x,u).
\tag*{(*)}
\end{equation*}
Here we assume that $a_{j}$ is a real once continuously
differentiable function and that $f$ is a real or complex
continuously
differentiable function.
The idea is to consider the vector field
\[A(x)=(a_{1}(x),\ a_{2}(x),\ \dots, \ a_{n}(x)).\]
If $\gamma$ is an integral curve of $A$ then
\[\frac{d\ }{dt}u(\gamma(t))
=\sum_{j=1}^{n}a_{j}(\gamma(t))
\frac{\partial u}{\partial x_{j}}(\gamma(t)).\]
Thus solving (*) is equivalent to solving the
ordinary differential equation
\begin{equation*}
\frac{d\ }{dt}u(\gamma(t))=f(\gamma(t),u(\gamma(t)))
\tag*{(**)}
\end{equation*}
This not only gives us a method for solving (*) but
tells us the appropriate boundary conditions to use.
For example, specifying $u(0,y)$ along an integral curve
will, in general, give a problem with no solution.
What we need to do is to specify $u(0,y)$ on a
hypersurface $S$ and solve along each integral curve.
Recall that that we defined
the \emph{characteristic set} of a linear differential
operator $P(x,\partial)$ to be the
subset of ${\mathbb R}^{n}\times{\mathbb R}^{n}$
where the principal symbol vanishes.
\[\Char(P)=\{(x,\xi)\, :\, \sigma_{k}(P)(x,\xi)=0\}.\]
In the case of the linear differential operator
\[L(x,\partial)=\sum_{j=1}^{n}a_{j}(x)\frac{\partial\ }{\partial x_{j}}\]
we have
\[\Char(L)=\left\{(x,\xi)\, :\, \sum_{j=1}^{n}a_{j}(x)\xi_{j}=0\right\}.\]
The hypersurface $S$ is said to be \emph{characteristic}
for $P$ at $x$ if the normal vector is a characteristic vector
for $P$. The hypersurface $S$ is called
\emph{non-characteristic}
if it is not characteristic at any point.
In the case of the linear differential operator $L$
which we are considering, $S$ is non-characteristic
if at each $x\in S$ the normal $\zeta$ to $S$ satisfies
\[\sum_{j=1}^{n}a_{j}\zeta_{j}\neq 0.\]
Retracing our steps it is clear that we have
essentially (there is a slight subtlety which we
shall deal with when we prove Theorem~\ref{quasi-linear})
proved the
following theorem.
(Note the use of the word `locally'.)
\begin{theorem}[Theorem 6] Locally, there is a unique
solution of $(*)$ with $u(x,0)$ given on a non-characteristic
hypersurface $S$.
\end{theorem}
Working through a few examples will make the theory
much clearer.
\begin{example}[Example 2] Solve the partial
differential equation
\[\frac{\partial u}{\partial x}+
2x\frac{\partial u}{\partial y}=u^{2}\]
subject to $u(0,y)=f(y)$.
\end{example}
\subsection{First order quasi-linear PDEs} An extension
of the previous technique enables us to solve the
slightly more general first order \emph{semi-linear}
partial differential equation
\begin{equation*}
\sum_{j=1}^{n}a_{j}(x,u)\frac{\partial u}{\partial x_{j}}=b(x,u).
\tag*{(***)}
\end{equation*}
subject to $u|_{S}=\phi$ where $S$ is a non-characteristic
hypersurface.
Here we assume that the $a_{j}$ is a real once continuously
differentiable function and that $b$ is a real
continuously
differentiable function. (Note that the technique
does not apply if $B$ is complex valued and that
a counter-example of Hans Lewy shows that a solution
may not exist.)
The solution technique relies on regarding $u$ as a variable
just like $x$. We give the technique as a recipe.
Suppose that $S$ is parameterised by a function $g$
so that
\[S=\{x=g(s)\, :\, s\in{\mathbb R}^{n-1}\}.\]
We work with the vector field
\[(a_{1},a_{2},\dots,a_{n},b)\]
on ${\mathbb R}^{n+1}$ and solve for the integral
curves
\begin{alignat*}{2}
\frac{dx_{s}}{dt}&=a(x_{s}(t),y_{s}(t))&&\qquad x_{s}(0)=g(s)\\
\frac{dy_{s}}{dt}&=b(x_{s}(t),y_{s}(t))&&\qquad y_{s}(0)=\phi(s).
\end{alignat*}
Our solution is basically $y(s,t)$ but we want it as
a function of $x$ not $(s,t)$. The map
\[(s,t)\mapsto x(s,t)\]
will have an invertible derivative at $t=0$ provided
that the vector
\[(a_{1}(g(s),\phi(s)),a_{2}(g(s),\phi(s)),\dots,a_{n}(g(s),\phi(s)))\]
is not tangent to $S$ and since we have specified that
$S$ is a non-characteristic this will always be the case.
A theorem of differential geometry (proved in Course B4
Differential Manifolds, but in any case very plausible)
called the `inverse function theorem' asserts that
under these circumstances (continuous derivative
invertible at a point) the map
\[(s,t)\mapsto x(s,t)\]
is locally invertible. We may thus define
\[u(x)=y(s(x),t(x))\]
and direct calculation shows that it is a solution.
Checking reversibility we see that we have proved the following
theorem.
\begin{theorem}[Theorem 7]\label{quasi-linear}
Equation (***) has a unique solution locally.
\end{theorem}
Once again, working through a few examples will make the theory
much clearer.
\begin{example}[Example 3] Solve the partial
differential equation
\[u\frac{\partial u}{\partial x}+
\frac{\partial u}{\partial y}=1\]
subject to $u=s/2$ on $x=y=s$.
\end{example}
If a semi-linear equation (with real coefficients)
has real right hand side
it can be solved by the technique of this subsection
but if the right hand side is complex we must use
the previous method.
\section{Distributions}
\subsection{Introduction}
Distribution theory is a
synthesis of ideas coming from partial differential
equations (e.g. weak solutions), physics
(the Dirac delta function) and functional analysis
but the whole is much more than the sum of its parts.
We shall need the notion of the support
of a continuous function.
\begin{definition} If $f:{\mathbb R}^{n}\rightarrow{\mathbb C}$
is continuous we define the support $\supp f$ of $f$
by
\[\supp f=\text{\rm closure}\{x\, :\, f(x)\neq 0\}.\]
\end{definition}
\begin{lemma} If $f:{\mathbb R}^{n}\rightarrow{\mathbb C}$
is continuous the support of $f$ is compact if and
only if there exists an $R\geq 0$ such that
$f(x)=0$ if $\|x\|>R$.
\end{lemma}
Remember that, for ${\mathbb R}^{n}$,
a set is compact if and only if it is closed and bounded.
We use the space of \emph{test functions}
${\mathcal D}=C_{0}^{\infty}({\mathbb R}^{n})$
consisting of smooth (that is infinitely
differentiable functions) functions of
compact support. This is a very restricted
space of functions but fortunately non-trivial.
\begin{lemma}[Lemma 1]\label{bump} (i) If we define
$E:{\mathbb R}\rightarrow{\mathbb R}$ by
\begin{alignat*}{2}
E(t)&=\exp(-1/t^{2})&&\qquad\text{for $t>0$}\\
E(t)&=0&&\qquad\text{otherwise}
\end{alignat*}
then $E$ is infinitely differentiable.
(ii) Given $\delta>\eta>0$ we can find
$F:{\mathbb R}\rightarrow{\mathbb R}$
such that
\begin{alignat*}{2}
1\geq F(t)&\geq 0&&\qquad\text{for all $t$}\\
F(t)&=1&&\qquad\text{if $|t|\leq\eta$}\\
F(t)&=0&&\qquad\text{if $|t|\geq\delta$}.
\end{alignat*}
(iii) Given $\delta>\eta>0$ we can find
$G:{\mathbb R}^{n}\rightarrow{\mathbb R}$
such that
\begin{alignat*}{2}
1\geq G(x)&\geq 1&&\qquad\text{for all $x$}\\
G(x)&=1&&\qquad\text{if $\|x\|\leq\eta$}\\
G(x)&=0&&\qquad\text{if $\|x\|\geq\delta$}.
\end{alignat*}
\end{lemma}
Functions like $G$ are called bump functions.
We equip ${\mathcal D}$ with a notion of convergence.
\begin{definition} Suppose that $f_{j}\in{\mathcal D}$
for each $j$ and $f\in{\mathcal D}$. We say that
\[f_{j}\underset{{\mathcal D}}{\rightarrow}f\]
if the following two conditions hold.
(i) There exists an $R$ such that $\supp f_{j}\subseteq B(0,R)$
for all $j$.
(ii) For each $\alpha$ we have
$\partial^{\alpha} f_{n}\rightarrow \partial^{\alpha} f$
uniformly as $n\rightarrow\infty$.
\end{definition}
This is a definition that needs to be thought about
carefully.
We can now define a distribution to be a linear map
$T:{\mathcal D}\rightarrow{\mathbb C}$ which is continuous
in the sense that
\[f_{n}\underset{{\mathcal D}}{\rightarrow}f
\ \text{implies}\ Tf_{n}\rightarrow Tf.\]
We write ${\mathcal D}'$ for the set of distributions.
we shall often write
\[Tf=\langle t, f\rangle.\]
\begin{lemma} The set ${\mathcal D}'$ is a vector
space if we use the natural definitions
\[\langle T+S,f\rangle=\langle T,f\rangle+\langle S,f\rangle,
\ \langle \lambda T,f\rangle=\lambda\langle T,f\rangle.\]
\end{lemma}
Our first key insight is the following.
\begin{lemma} If $g\in C({\mathbb R}^{n})$ then if we define
$T_{g}$ by
\[\langle T_{g},f\rangle=\int_{{\mathbb R}^{n}}g(t)f(t)\,dt\]
we have $T_{g}\in {\mathcal D}'$.
\end{lemma}
We usually write $T_{g}=g$, i.e.
\[\langle T_{g},f\rangle=\int_{{\mathbb R}^{n}}g(t)f(t)\,dt\]
and say that every continuous function is a distribution.
(Hence the name `generalised function' sometimes given
to distributions.)
The second insight is more in the nature of a
recipe than a theorem.
\begin{lemma}[The standard recipe]\label{recipe}
Suppose that $A:{\mathcal D}\rightarrow{\mathcal D}$
is a linear map which is continuous in the sense that
\begin{equation*}
\tag{1}
f_{n}\underset{{\mathcal D}}{\rightarrow}f
\ \text{implies}\ Af_{n}\underset{{\mathcal D}}{\rightarrow}Af
\end{equation*}
and that
$A^{t}:{\mathcal D}\rightarrow{\mathcal D}$
is a linear map which is continuous in the sense that
\begin{equation*}
\tag{2}
f_{n}\underset{{\mathcal D}}{\rightarrow}f
\ \text{implies}\ A^{t}f_{n}\underset{{\mathcal D}}{\rightarrow}A^{t}f.
\end{equation*}
Suppose in addition
\begin{equation*}
\tag{3}
\langle Ag,f\rangle=\langle g,A^{t}f\rangle
\end{equation*}
Then, if $S$ is any distribution
we can define a distribution $T_{A}S$ by the equation
\[\langle T_{A}S,f\rangle=\langle S,A^{t}f\rangle.\]
The mapping $T_{A}:{\mathcal D}'\rightarrow {\mathcal D}'$
is linear and
whenever $g\in {\mathcal D}$ we will have $Ag=T_{A}g$.
\end{lemma}
We usually write $T_{A}=A$, so that
\[\langle AT,f\rangle=\langle T,A^{t}f\rangle.\]
\begin{exercise} Although we shall not use this in the
course, convergence in ${\mathcal D}'$ is defined as follows.
If $S_{j}$ $[j\geq 1]$ are distributions we say that
$T_{j}\underset{{\mathcal D}'}{\rightarrow} S$ if
\[\langle S_{n},f\rangle\rightarrow \langle S,f\rangle\]
for all $f\in{\mathcal D}$. Show that
if $A$ satisfies the conditions of Lemma~\ref{recipe},
the map $T_{A}:{\mathcal D}'\rightarrow {\mathcal D}'$
is continuous in the sense that
\[S_{j}\underset{{\mathcal D}'}{\rightarrow} S
\ \text{implies}
\ T_{A}S\underset{{\mathcal D}'}{\rightarrow} T_{A}S.\]
\end{exercise}
\begin{lemma}\label{various}
If $T$ is a distribution we can define
associated distributions by following Lemma~\ref{recipe}
as follows.
(i) (Multiplication by a smooth function.) If
$\psi\in C^{\infty}({\mathbb R}^{n})$
\[\langle \psi T,f\rangle =\langle T,\psi f\rangle\]
for all $f\in{\mathcal D}$.
(ii) (Differentiation)
\[\langle \partial_{x_{j}} T,f\rangle
=-\langle T,\partial_{x_{j}} f\rangle\]
for all $f\in{\mathcal D}$.
(iii) (Reflection) If we write $Rf(x)=f(-x)$ then
\[\langle R T,f\rangle =\langle T,R f\rangle\]
for all $f\in{\mathcal D}$.
(iv) (Translation) If we write $\tau_{a}f(x)=f(x-a)$ then
\[\langle \tau_{a} T,f\rangle =\langle T,\tau_{-a} f\rangle\]
for all $f\in{\mathcal D}$.
\end{lemma}
Here is another example along the same lines.
\begin{lemma}
If $A$ is a linear map on
${\mathbb R}^{n}$ then it induces a linear
change of coordinate map on functions $A^{*}$ by
the formula
\[A^{*}f(x)=f(Ax).\]
If $A$ is invertible we can define a transpose
map $(A^{*})^{t}$ by $(A^{*})^{t}=|\det A|^{-1}(A^{-1})^{*}$
i.e. by
\[((A^{*})^{t}(f))(x)=|\det A|^{-1}f(T^{-1}x).\]
If $T$ is a distribution we can define an
associated distribution (i.e. obtain a change
of variables result for distributions)
by following Lemma~\ref{recipe} to obtain
\[\langle A^{*}T,f\rangle=\langle T,|\det A|^{-1}(A^{-1})^{*}f\rangle.\]
\end{lemma}
[Joshi and Wasserman think in terms of `signed areas'
and have $\det A$ where we have $|\det A|$ in accordance
with the conventions of applied mathematics.]
As an indication that we are on the right road,
the next example shows consistency with the
Dirac deltas of mathematical methods in 1B.
\begin{example} (i) If we write
\[\langle \delta_{a} ,f\rangle =f(a)\]
for all $f\in{\mathcal D}$ then $\delta_{a}$
is a distribution.
(ii) With the notation of Lemma~\ref{various}~(iv),
\[\tau_{b}\delta_{a}=\delta_{a+b}.\]
(iii) We have
\[\langle \partial_{x_{j}} \delta_{a},f\rangle
=-(\partial_{x_{j}} f)(a)\]
for all $f\in{\mathcal D}$.
(iv) If we work on ${\mathbb R}$ and define the
Heaviside function $H:{\mathbb R}\rightarrow{\mathbb R}$
by
\begin{alignat*}{2}
H(t)&=1&&\qquad\text{for $t\geq 0$,}\\
H(t)&=0&&\qquad\text{otherwise}
\end{alignat*}
then, in the distributional sense, $H'=\delta$.
\end{example}
\subsection{The support of a distribution}
We start with a definition.
\begin{definition} If $T\in{\mathcal D}'$ then the
support $\supp T$ of $T$ is defined as follows.
A point $x\notin\supp T$ if we can find an
open set $U$ such that $x\in U$ and whenever
$f\in{\mathcal D}$ is such that $\supp f\subseteq U$
we have $\langle T,f\rangle=0$.
\end{definition}
\begin{exercise}
(i) Check that if $f$ is a continuous
function its support as a continuous function
is the same as its support when it is considered
as a distribution.
(ii) Show that if $T$ is a distribution $\supp T$ is closed.
\end{exercise}
The following theorem is extremely important
as is the method (partition of unity) by which
we obtain it.
\begin{theorem}\label{support exists}
If $f\in{\mathcal D}$ and $T\in{\mathcal D}'$
then
\[\supp T\cap \supp f=\emptyset
\ \text{implies}\ \langle T,f\rangle=0.\]
\end{theorem}
In my opinion the partition of unity is more
an idea rather than a theorem but here
is one form expressed as a theorem.
\begin{exercise}[Theorem 12] Let $K$ be a compact
subset of ${\mathbb R}^{n}$ and $U_{1}$, $U_{2}$,
\dots $U_{m}$ open sets in ${\mathbb R}^{n}$ such
that $K\subseteq \bigcup U_{j}$. Then we can find
$f_{j}\in{\mathcal D}$ with $0\leq f_{i}(x)\leq 1$
for all $x$, $\supp f_{j}\subseteq U_{j}$
and $\sum f_{j}(x)=1$ for $x\in K$,
$\sum f_{j}(x)\leq1$ everywhere.
\end{exercise}
When looking at Theorem~\ref{support exists}
you should keep the following important example in mind.
\begin{exercise} We work in ${\mathbb R}$.
(i) Show that there exists an $f\in{\mathcal D}$
with $f(0)=0$, $f'(0)=1$.
(ii) Observe that $f(x)=0$ when $x\in\supp\delta_{0}$
but $\langle \delta_{0}',f\rangle=-1$.
(iii) Why does this not contradict
Theorem~\ref{support exists}?
\end{exercise}
The support tells us where a distribution lives.
A related concept is the singular support.
\begin{definition} If $T\in{\mathcal D}'$ then the
singular support $\singsupp T$ of $T$ is defined as follows.
A point $x\notin\singsupp T$ if we can find an
$f\in{\mathcal D}$ such that $x\notin\supp T-f$.
\end{definition}
A careful argument using partitions of unity
shows that if $\singsupp T=\emptyset$ then $T$
is in fact a smooth function.
When working with distributions it is helpful to
recall the following results.
\begin{lemma} If $T$ is a distribution and $\partial_{j}T=0$
for all $j$ then $T=c$ where $c$ is a constant.
\end{lemma}
(We shall not prove this plausible result but like
much else it may be found in Friedlander's beautiful
little book, see section 2.1~\cite{Friedlander}.)
\begin{lemma}[Theorem 28, A version of Taylor's theorem]%
\label{Taylor one}
If $f\in C^{\infty}({\mathbb R})$
and $f(0)=f'(0)=\dots=f^{m-1}(0)=0$ then we can find
$g\in C^{\infty}({\mathbb R})$ such that
\[f(x)=x^{m}g(x)\]
for all $x$.
\end{lemma}
There is no difficulty (apart from notation) in extending
Lemma~\ref{Taylor one} to higher dimensions.
In the next section we shall make use
of the following special case.
\begin{lemma}\label{Taylor many}
If $f\in C^{\infty}({\mathbb R}^{n})$
and $f(0)=0$ then we can find
$g_{j}\in C^{\infty}({\mathbb R}^{n})$ such that
\[f(x)=\sum g_{j}x_{j}\]
for all $x$.
\end{lemma}
As an example of the use of Lemma~\ref{Taylor one}
we do the following exercise.
\begin{exercise} Show that the general solution of
\[xT=0\]
is $T=c\delta_{0}$.
\end{exercise}
\subsection{Fourier transforms and the Schwartz space}
We would like to take Fourier
transforms of distributions but if we look at the recipe
of Lemma~\ref{recipe} we see that it fails since
$f\in{\mathcal D}$ does not imply $\hat{f}\in{\mathcal D}$.
(In fact, unless $f=0$, $f\in{\mathcal D}$
implies $\hat{f}\notin{\mathcal D}$ but we do not need
this result.)
Schwartz introduced another space of functions
${\mathcal S}({\mathbb R}^{n}$ which behaves extraordinarily
well under Fourier transforms.
\[{\mathcal S}({\mathbb R}^{n})
=\{f\in C^{\infty}({\mathbb R}^{n})\, :
\, \sup_{x}(1+\|x\|)^{m}|\partial^{\alpha}f(x)|<\infty
\ \text{for all $m\geq 0$ and $\alpha$}\}.\]
Thus ${\mathcal S}$ `consists of all infinitely
differentiable functions all of whose derivative
tend to zero faster than polynomial towards infinity'.
Since ${\mathcal S}\supseteq {\mathcal D}$ we have
plentiful supply of such functions.
If $f\in{\mathcal S}({\mathbb R}^{n})$ then the Fourier transform
\[\hat{f}(\xi)=\left(\frac{1}{2\pi}\right)^{n/2}
\int_{{\mathbb R}^{n}}e^{-ix.\xi}f(x)\,dx\]
We sometimes write ${\mathcal F}f=\hat{f}$.
We also write
\[D_{j}f=-i\frac{\partial\ }{\partial x_{j}}\]
and abusing notation take $x_{j}f$ to be the function
\[x\mapsto x_{j}f(x).\]
It is clear that
\begin{align*}
D_{j}:{\mathcal S}({\mathbb R}^{n})&\rightarrow
{\mathcal S}({\mathbb R}^{n})\\
x_{j}:{\mathcal S}({\mathbb R}^{n})&\rightarrow
{\mathcal S}({\mathbb R}^{n})
\end{align*}
Familiar computations now give our first result.
\begin{lemma}[Lemma 2]\label{familiar}
The Fourier transform
$f\mapsto \hat{f}$ takes
${\mathcal S}({\mathbb R}^{n})$ to
${\mathcal S}({\mathbb R}^{n})$ and
\begin{align*}
\widehat{D_{j}f}&=\xi_{j}\hat{f}\\
\widehat{x_{j}f}&=-D_{j}\hat{f}
\end{align*}
\end{lemma}
Lemma~\ref{familiar} can be used to give a neat
proof of the Fourier inversion formula for ${\mathcal S}$.
We use the following lemma.
\begin{lemma}[Lemma 4]\label{clever chaps}
If $T:{\mathcal S}({\mathbb R}^{n})\rightarrow
{\mathcal S}({\mathbb R}^{n})$ is a linear map which
commutes with $x_{j}$ and $D_{j}$ for all $j$
then, writing $I$ for the identity map,
\[T=cI\]
for some constant $c$.
\end{lemma}
Our proof of Lemma~\ref{clever chaps} depends on a slight
improvement of the Taylor series result Lemma~\ref{Taylor many}.
\begin{lemma}[Corollary 2] If $f\in {\mathcal S}({\mathbb R}^{n})$
and $f(a)=0$ then we can find
$f_{j}\in {\mathcal S}({\mathbb R}^{n})$
such that
\[f(x)=\sum_{j=1}^{n}(x_{j}-a_{j})f_{j}(x)\]
for all $x$.
\end{lemma}
We can now prove our inversion theorem.
\begin{theorem}\label{Fourier inverse}
If $R:{\mathcal S}({\mathbb R}^{n})\rightarrow
{\mathcal S}({\mathbb R}^{n})$ is the map
given by $Rf(x)=f(-x)$ then
\[{\mathcal F}^{2}=R.\]
\end{theorem}
Observe that this means that ${\mathcal F}^{4}=I$
so ${\mathcal F}$ is invertible.
Stating our result in a more traditional but equivalent manner
we have the following theorem.
\begin{theorem}[Theorem 9] The Fourier transform
\[f\mapsto \hat{f}\]
is an isomorphism of ${\mathcal S}$ onto itself
with inverse given by
\[f(x)=\left(\frac{1}{2\pi}\right)^{n/2}
\int_{{\mathbb R}^{n}}e^{ix.\xi}\hat{f}(\xi)\,dx.\]
\end{theorem}
If $f,g\in{\mathcal F}$, or more generally if the
integral makes sense, we define the convolution
$f*g$ by
\[f*g(x)=\int_{{\mathbb R}^{n}}f(x-t)g(t)\,dt\]
The standard results that we recall from mathematical methods hold.
\begin{lemma}[Theorem 10]\label{Parseval}
If $f,g\in{\mathcal F}$
(i) $\int f\hat{g}=\int \hat{f} g$,
(ii) $\int f g^{*}=\int \hat{f}(\hat{g})^{*}$, (Parseval)
(iii) $\widehat{f*g}=\hat{f}\hat{g}$,
(iv) $\widehat{fg}=\hat{f}*\hat{g}$,
(v) $f*g\in{\mathcal F}$.
\end{lemma}
\subsection{Tempered distributions} Just as we constructed a
space of distributions ${\mathcal D}'$ using functions from
${\mathcal D}$
as \emph{test functions} so we can construct a
space of distributions ${\mathcal S}'$ using functions from
${\mathcal S}$ as test functions.
Of course, we need to equip ${\mathcal S}$ with a notion of convergence.
\begin{definition} Suppose that $f_{j}\in{\mathcal S}$
for each $j$ and $f\in{\mathcal S}$. We say that
\[f_{j}\underset{{\mathcal S}}{\rightarrow}f\]
if for each $m\geq 0$ and $\alpha$ we have
$(1+\|x\|)^{m}\partial^{\alpha} (f_{j}(x)-f(x))\rightarrow 0$
uniformly as $n\rightarrow\infty$.
\end{definition}
We can now say that $T\in{\mathcal S}'$ to be a linear map
$T:{\mathcal D}\rightarrow{\mathbb C}$ which is continuous
in the sense that
\[f_{n}\underset{{\mathcal S}}{\rightarrow}f
\ \text{implies} Tf_{n}\rightarrow Tf.\]
As before
we shall often write
\[Tf=\langle T, f\rangle.\]
We can now develop the theory of \emph{tempered
distributions} $T\in{\mathcal S}'$ just as we
did that of distributions $T\in{\mathcal D}'$.
Using results in the previous
subsection it is easy to check that
${\mathcal F}:{\mathcal S}\rightarrow{\mathcal S}$
is a linear map which is continuous in the sense that
\begin{equation*}
\tag{1}
f_{n}\underset{{\mathcal S}}{\rightarrow}f
\ \text{implies}\ {\mathcal F}f_{n}
\underset{{\mathcal S}}{\rightarrow}{\mathcal F}f
\end{equation*}
and that
\begin{equation*}
\tag{3}
\langle {\mathcal F}g,f\rangle=\langle g,{\mathcal F}f\rangle
\end{equation*}
for any $f,g\in{\mathcal S}$. (Thus, in the language
of Lemma~\ref{recipe}, ${\mathcal F}^{t}={\mathcal F}$.)
Thus, applying the result corresponding to
Lemma~\ref{recipe} if $T\in{\mathcal S}'$
we can define ${\mathcal F}T$ by the equation
\[\langle {\mathcal F}T,f\rangle=\langle T,{\mathcal F}f\rangle,\]
or in the obvious corresponding notation
\[\langle \hat{T},f\rangle=\langle T,\hat{f}\rangle,\]
There is no difficulty in proving results corresponding to
Lemma~\ref{familiar}.
\begin{lemma} (i) If $R:{\mathcal S}({\mathbb R}^{n})\rightarrow
{\mathcal S}'({\mathbb R}^{n})$ is the extension to
${\mathcal S}'$ of the reflection function on ${\mathcal S}$
given by $Rf(x)=f(-x)$ then, working in ${\mathcal S}'$
\[{\mathcal F}^{2}=R.\]
(ii) The Fourier transform
\[f\mapsto \hat{f}\]
is an isomorphism of ${\mathcal S}'$ onto itself
with inverse given by ${\mathcal F}^{3}$.
\[f(x)=\left(\frac{1}{2\pi}\right)^{n/2}
\int_{{\mathbb R}^{n}}e^{ix.\xi}\hat{f}(\xi)\,dx\]
(iii) If $T\in {\mathcal S}'$ then, in an obvious notation,
\begin{align*}
\widehat{D_{j}T}&=\xi_{j}\hat{T}\\
\widehat{x_{j}T}&=-D_{j}\hat{T}
\end{align*}
\end{lemma}
However we do not have to develop the theory of ${\mathcal S}'$
and ${\mathcal D}'$ separately since ${\mathcal D}$ sits
nicely in ${\mathcal S}$ and so ${\mathcal S}'$ sits
nicely in ${\mathcal D}'$.
\begin{theorem} (i) We have ${\mathcal D}\subseteq{\mathcal S}$.
Moreover if that $f_{j}\in{\mathcal D}$
for each $j$ and $f\in{\mathcal D}$ then
\[f_{j}\underset{{\mathcal D}}{\rightarrow}f
\ \text{implies}\ f_{j}\underset{{\mathcal S}}{\rightarrow}f\]
(ii) We may consider any element $T\in{\mathcal S}'$
as an element of ${\mathcal D}$ in a natural manner.
\end{theorem}
\begin{theorem}[Theorem 13] (i) Given any $f\in{\mathcal S}$
we may find a sequence $f_{j}\in{\mathcal D}$ such that
\[f_{j}\underset{{\mathcal S}}{\rightarrow}f\]
as $j\rightarrow\infty$.
(ii) If $T\in{\mathcal F}$ and $\langle T,f\rangle=0$
whenever $f\in{\mathcal D}$ then $\langle T,f\rangle=0$
whenever $f\in{\mathcal S}$.
(iii) Let $T,\ S\in{\mathcal F}$. If $T$ and $S$ are
unequal when considered as members of ${\mathcal F}$
they remain unequal when considered as members
${\mathcal D}$.
\end{theorem}
Thus we may, and shall consider ${\mathcal S}'$ as
a subspace of ${\mathcal D}'$. When we talk about
distributions we shall mean members of ${\mathcal D}'$.
When we talk about \emph{tempered distributions}
we shall mean members of ${\mathcal S}'$.
\begin{example}[Example 6]~\label{transform delta}
The delta function $\delta_{0}$
is a tempered distribution. Its Fourier transform
$\hat{\delta_{0}}$ is the constant function $(2\pi)^{-n/2}$.
The constant function $1$ is a tempered distribution.
Its Fourier transform
$\hat{1}$ is $(2\pi)^{n/2}\delta_{0}$.
\end{example}
The way we have defined $\hat{T}$ by Lemma~\ref{recipe}
ensures that
\[\text{distributional Fourier transform}(f)
=\text{classical Fourier transform}(f)\]
whenever $f\in{\mathcal F}$. In fact more is true.
\begin{lemma}[Simple version of Proposition 2]
If $f$ is continuous
and $\int |f(x)|\,dx<\infty$ then
\[\text{\textrm{distributional Fourier transform}}(f)
=\text{\textrm{classical Fourier transform}}(f)\]
i.e.
\[\hat{f}(\xi)=\left(\frac{1}{2\pi}\right)^{n/2}
\int_{{\mathbb R}^{n}}e^{-ix.\xi}f(x)\,dx.\]
\end{lemma}
We shall also need the following simple result
which the reader may already have met.
\begin{lemma} If $f$ is continuous
and $\int |f(x)|\,dx<\infty$ then
$\hat{f}$ is continuous and bounded.
\end{lemma}
As a corollary we have the following useful lemma.
\begin{lemma}[Corollary 5] If $q$ is smooth and
\[|q(\xi)|\leq C((1+\|\xi\|^{2})^{1/2})^{-n-l-\epsilon}\]
for some $\epsilon>0$ then the Fourier transform
and Fourier inverse transform of $q$ is $C^{l}$.
\end{lemma}
\section{Convolution and fundamental solutions} In this chapter we
shall return at last to the topic of partial differential
equations but first we need to study convolution
of distributions.
\subsection{Convolution}
Unfortunately the notion
of convolution of distributions is not as `clean'
as the rest of the theory but it is essential.
Recall that if $f$ and $g$ are `nice functions'
(e.g. if they are both Schwartz functions)
we define the convolution
$f*g$ by
\[f*g(x)=\int_{{\mathbb R}^{n}}f(x-t)g(t)\,dt.\]
Convolution plays an essential role in
probability theory, signal processing,
linear partial differential equations (as
we shall see) and many other parts of analysis.
We proved in Lemma~\ref{Parseval}~(iii) that
$\widehat{f*g}=\hat{f}\hat{g}$, and it can be
argued that importance of the Fourier transform
is that it converts convolution into multiplication.
However if we try out this formula on the tempered
distribution $1$ we get
\[\widehat{1*1}=\hat{1}\hat{1}=(2\pi)^{n}\delta_{0}\delta_{0}\]
an `equation' in which neither side makes sense
(at least within distribution theory as developed here).
\emph{In general we can not multiply two distributions
and we can not convolve two distributions.}
Having said this, there are many circumstances in which it
is possible to convolve two distributions. To see this
observe that under favourable circumstances,
if $f$, $g$ and $h$ are all nice functions
\begin{align*}
\langle f*g,h\rangle
&=\int\left(\int_{{\mathbb R}^{n}}f(s-y)g(y)\,dy\right)h(s)\,ds\\
&=\int\int f(s-y)g(y)h(y)\,dy\,ds\\
&=\int\int f(s-y)g(y)h(s)\,ds\,dy\\
&=\int\int f(x)g(y)h(x+y)\,dx,\,dy\\
&=\langle f(x),\langle g(y),h(x+y)\rangle\rangle.
\end{align*}
We shall attempt to generalise this. Our treatment
is not quite as general as that given by
Joshi and Wassermann in section~4.4 of their
notes but follows the treatment given by
Friedlander in his book~\cite{Friedlander}.
The underlying idea
is identical and either treatment can be
used in examination questions.
Looking at the discussion of convolution in `favourable
circumstances' we observe that we have made use of the
`exterior product'
$(f,g)\mapsto f\otimes g$ where
\[f\otimes g(x,y)=f(x)g(y).\]
We observe that $f$, $g$ and $f\otimes g$ are considered as
distributions then
\begin{equation*}
\tag*{\dag}
\langle f\otimes g,h\rangle=\int\int f(x)g(y)h(x,y)\,dx\,dy.
\end{equation*}
The exterior product carries over easily to distributions.
If $S\in{\mathcal D}({\mathbb R}^{n})$
$T\in{\mathcal D}({\mathbb R}^{m})$ we set
\[\langle S\otimes T,h\rangle=\langle S,\langle T,h_{x}\rangle\rangle\]
where $h_{x}(y)=h(x,y)$ and $h\in{\mathbb R}^{n+m}$.
Using dummy variables
\[\langle S\otimes T(x,y),h(x,y)\rangle
=\langle S(x),\langle T(y),h(x,y)\rangle\rangle.\]
\begin{lemma} With the notation and hypotheses of the
previous paragraph, $S\otimes T$ is a well defined member
of ${\mathcal D}({\mathbb R}^{m+n})$.
\end{lemma}
Suppose now $S,\ T\in{\mathcal D}({\mathbb R}^{n})$.
If $h\in{\mathcal D}({\mathbb R}^{n})$ the discussion
of convolution in `favourable circumstances' suggests
that we should look at the function
$\tilde{h}:{\mathbb R}^{2n}\rightarrow{\mathbb C}$
given by $\tilde{h}(x,y)=h(x+y)$ and apply $S\otimes T$ to
$\tilde{h}$.
Unfortunately, although $\tilde{h}\in C^{\infty}({\mathbb R}^{2n})$,
$\tilde{h}$ is not of compact support unless $h=0$.
\begin{exercise} Prove the two statements made in
the last sentence.
\end{exercise}
So far we have only allowed elements of ${\mathcal D}'$
to operate on elements of ${\mathcal D}$ so we appear to be stuck.
However, \emph{under certain circumstances},
elements of ${\mathcal D}'$ will operate on more general
$C^{\infty}$ functions in a natural manner.
\begin{lemma}\label{diagonal}
(i) Suppose $T\in{\mathcal D}'({\mathbb R}^{n})$
and $f\in C^{\infty}({\mathbb R}^{n})$. If
$\supp f\cap\supp T$ is compact then there is a natural
definition of $\langle T,f\rangle$.
(ii) Suppose $T\in{\mathcal D}'({\mathbb R}^{n})$
and $f_{j}\in C^{\infty}({\mathbb R}^{n})$. Suppose further
that there exists a compact set $K$ such that
$K\supseteq \supp f_{j}\cap\supp T$ for all $j$.
Then if $\partial^{\alpha}f_{j}\rightarrow 0$
uniformly on $K$ for each $\alpha$ it follows
that $\langle T,f_{j}\rangle\rightarrow 0$ as $j\rightarrow\infty$.
\end{lemma}
We have arrived at our goal.
\begin{theorem}\label{define convolution}
If $S,\ T\in{\mathcal D}'({\mathbb R}^{n})$
and $\supp S$ or $\supp T$ is compact then we may define
$S*T$ as follows. If $h\in{\mathcal D}({\mathbb R}^{n})$
define $\tilde{h}:{\mathbb R}^{2n}\rightarrow{\mathbb C}$
by $\tilde{h}(x,y)=h(x+y)$, the expression
$\langle S\otimes T,\tilde{h}\rangle$ is well defined
in the sense of Lemma~\ref{diagonal} so we set
\[\langle S*T, h\rangle=\langle S\otimes T,\tilde{h}\rangle\]
The map $S*T$ so defined is a distribution.
\end{theorem}
\begin{lemma} With the notation and hypotheses
of Theorem~\ref{define convolution}
\[\supp S*T\subseteq \supp S+\supp T.\]
\end{lemma}
(Here $\supp S+\supp T=\{x+y\,:\, x\in \supp S,\ y\in\supp T\}$.)
Note that if the hypotheses of Lemma~\ref{define convolution}
hold then using dummy variables
we have the elegant formula
\[\langle S*T(t),f(t)\rangle=\langle S(x)\langle T(y),f(x+y)\rangle.\]
The hypotheses of Theorem~\ref{define convolution} can
clearly be relaxed somewhat, so Joshi and Wassermann refer
to `convolvable distributions'.
We summarise some of the useful facts about convolutions.
\begin{lemma} (i) If $T$ and $S$ are distributions at least one
of which has compact support then
\[T*S=S*T.\]
(ii) We recall that $\delta_{0}$ has compact support.
If $T$ is any distribution
\[\delta_{0}*T=T.\]
(iii) If $R$, $T$ and $S$ are distributions at least two
of which have compact support then
\[R*(S*T)=(R*S)*T.\]
(iv) If $T$ and $S$ are distributions at least one
of which has compact support then
\[\partial^{\alpha}(T*S)=(\partial^{\alpha}T)*S
=T*(\partial^{\alpha}S).\]
\end{lemma}
Note that this gives
$\partial^{\alpha}u=\partial^{\alpha}\delta_{0}*u$,
so applying a differential operator is equivalent
to convolving with some distribution.
One of the key facts about convolution is that
it smooths.
\begin{lemma}\label{differentiate convolution}
If $T$ is a distribution and $f$
an $m$ times continuously differentiable function
and at least one of the two has
compact support then $\langle T_{y},f(x-y)\rangle$
is a function and, as distributions,
\[\langle T_{y},f(x-y)\rangle=T*f(x).\]
Thus $T*f$ is
an $m$ times continuously differentiable function.
\end{lemma}
We shall not need the next result so I leave it as an exercise
but the discussion would seem incomplete without it.
\begin{exercise} (i) If $T$ is a distribution of compact support
then, using the extension of Lemma~\ref{diagonal},
\[\hat{T}(\xi)=(2\pi)^{-n/2}\langle T(x),\exp(i \xi.x)\rangle.\]
Further $\hat{T}$ is a continuous function.
(ii) If $S$ and $T$ are distributions of compact support
\[\widehat{S*T}(\xi)=\widehat{T}(\xi)\widehat{S}(\xi).\]
\end{exercise}
\subsection{Fundamental solutions} We now have the tools
to make substantial progress with the study
of linear partial differential equations
with constant coefficients. The key notion
is that of a fundamental solution.
\begin{definition}\label{fundamental}
A \emph{fundamental solution} of
$P(D)$ where $P(\xi_{1},\ \dots,\ \xi_{n})$ is
a polynomial with constant coefficients and
$P(D)=P(-i\partial_{1},\ \dots,\ -i\partial_{n})$
is a distributional solution, $T$, of
\[P(D)T=\delta_{0}.\]
\end{definition}
Note that we do not say that a fundamental solution is unique.
\begin{example}\label{simplest fundamental}
If $n=1$ and $P(x)=ix$ then
$E_{c}=H+c$ is a fundamental solution where $H$ is the Heaviside
function and $c$ is any constant.
\end{example}
The interest of fundamental solutions is shown by the next lemma.
\begin{lemma}\label{useful fundamental}
We use the notation of Definition~\ref{fundamental}
(i) If $T$ is a distribution of compact support and
$E$ is a fundamental solution of $p(D)$ then
$S=E*T$ is a solution of $P(D)S=T$.
(ii) If $f$ is an m times continuously differentiable
function of compact support and
$E$ is a fundamental solution of $P(D)$ then
$S=E*f$ is an
m times continuously differentiable
solution of $P(D)S=T$.
\end{lemma}
\begin{example}\label{next simplest} (i) We continue
with Example~\ref{simplest fundamental}.
Suppose $f$ is a continuous function of compact support
and we wish to solve
\[u'(x)=f(x).\]
Applying Lemma~\ref{useful fundamental} we obtain
\[u(x)=c\int_{-\infty}^{\infty}f(t)\,dt
+\int_{-\infty}^{x}f(t)\,dt.\]
(ii) Particular interest attaches to the two cases
$c=0$ when the solution is
\begin{equation}
\tag{A}
u(x)=\int_{-\infty}^{x}f(t)\,dt
\end{equation}
and $c=-1$ when the solution is
\begin{equation}
\tag{B}
u(x)=-\int_{x}^{\infty}f(t)\,dt
\end{equation}
since equation (A) produces a solution for $u'(x)=f(x)$
valid whenever $f(x)=0$ for $x$ large and positive and
equation (B) produces a solution for $u'(x)=f(x)$
valid whenever $f(x)=0$ for $x$ large and negative.
\end{example}
One of the points to note about Example~\ref{next simplest}~(ii)
is that it shows the advantage of extending the notion
of convolution as far as as we can.
\begin{example} If $S$ and $T$ are distributions on ${\mathbb R}$
such that we can find a real number $R$ such that
\[\supp S,\ \supp T\subseteq (-R,\infty).\]
Explain in as much detail as you consider desirable
why we can define $S*T$.
\end{example}
It also shows that it may be desirable to consider
different fundamental solutions for different kinds of
problems. We can think of fundamental solutions
as inverses on different classes of functions in accordance
with the formula
\[E*P(D)=P(D)*E=\delta_{0}.\]
\subsection{Our first fundamental solution}
If $E$ is a fundamental solution of $P(D)$ then
by definition
\[P(D)E=\delta_{0}\]
so, provided that $E$ is a tempered distribution,
we may take Fourier transforms to obtain
\[P(\xi)\hat{E}(\xi)=(2\pi)^{-n/2},\]
so, at least formally,
\[\hat{E}(\xi)=\frac{1}{(2\pi)^{n/2}P(\xi)}.\]
If $P(\xi)$ is never zero the argument can
easily be reversed
in rigorous manner to give the following
theorem.
\begin{theorem}\label{Easy fundamental}
If $P$ is a polynomial with
no real zeros then there is a fundamental
solution given by the tempered distribution
$u$ with
\[\hat{u}(\xi)=\frac{1}{(2\pi)^{n/2}P(\xi)}.\]
There is only one fundamental
solution which is also a tempered distribution.
\end{theorem}
\begin{exercise} Show that Theorem~\ref{Easy fundamental}
applies to the operator $-\Delta+1$.
Use Theorem~\ref{Easy fundamental} to solve
the one dimensional case
\[-u''+u=\delta_{0}\]
and verify by direct calculation that
you have indeed got a solution.
\end{exercise}
A large part of the theory of partial differential
equations consists of different ways of deal with the
zeros of $P(\xi)$.
\subsection{The parametrix}
If $P(D)$ is an elliptic operator, $P(\xi)$ may have real zeros
but they must lie in a bounded set. Using this we can obtain
something `almost as good' as a fundamental solution.
\begin{theorem}[Theorem 14]\label{parametrix}
If $P(D)$ is elliptic, $\phi$ a smooth function
of compact support identically $1$ on a neighbourhood
of the zero set of $P(\xi)$ then we can find a
tempered distribution $E$ with
\[\hat{E}(\xi)=\left(\frac{1}{2\pi}\right)^{n/2}
\frac{1}{P(\xi)}(1-\phi)(\xi).\]
We have
\[P(D)E=\delta_{0}+f\]
where $f\in{\mathcal S}({\mathbb R}^{n})$.
Also
\[\singsupp E\subseteq\{0\}.\]
\end{theorem}
\begin{lemma}\label{better parametrix}
If $P(D)$ is elliptic and $U$ a neighbourhood
of $0$ then we can find a
tempered distribution $E$ with $\supp E\subseteq U$,
$\singsupp E\subseteq\{0\}$,
and
\[P(D)E=\delta_{0}+f\]
where $f\in{\mathcal S}({\mathbb R}^{n})$.
\end{lemma}
We call a distribution like $E$ in Theorem~\ref{parametrix}
or Lemma~\ref{better parametrix} a parametrix.
It will, no doubt, have occured to the reader that
if we solve a partial differential equation
\[P(D)u=f\]
in the distributional sense what we get is
a distribution $u$ and not necessarily
a classical function
even if $f$ is a classical function.
The next theorem removes this problem in certain cases.
\begin{theorem}[Theorem 15, special case of Weyl's lemma]%
\label{Weyl}
If $u\in{\mathcal D}'$ and $P(D)$ is an elliptic differential
operator then $u$ and $P(D)u$ have the same singular
support. In particular if $P(D)u$ is smooth so is $u$.
\end{theorem}
\subsection{Existence of the fundamental solution}
Except when we discussed first order equations,
this course has been about linear partial differential
equations with constant coefficients. It is
an important fact, which we shall not prove,
that in this case there is always a fundamental solution.
\begin{theorem}[Theorem 16]\label{fundamental exist}
If $P(\xi_{1},\ \dots,\ \xi_{n})$ is
a polynomial with constant coefficients and
$P(D)=P(-i\partial_{1},\ \dots,\ -i\partial_{n})$
then we can find a distribution $E$ such that
\[P(D)E=\delta_{0}.\]
\end{theorem}
A proof of Theorem~\ref{fundamental exist} is given
in section~10.4 of
Friedlander's book~\cite{Friedlander}. We shall
prove an important special case.
\begin{theorem}~\label{fundamental easy}
If $P(\xi_{1},\ \dots,\ \xi_{n})$ is
a polynomial with constant coefficients and there
exists an $a\in{\mathbb R}^{n}$ and an $\eta>0$
such that
\[|P(\xi+ia)|\geq \eta\]
for all $\xi\in{\mathbb R}^{n}$
then we can find a distribution $E$ such that
\[P(D)E=\delta_{0}.\]
\end{theorem}
We prove this result by contour pushing so we need
a result on analyticity and growthR
\begin{lemma}[Proposition 5.5, the Paley-Wiener estimate]
If $f\in{\mathcal D}({\mathcal R}^{n})$ then there
is a natural extension of $\hat{f}$ to a function
on ${\mathbb C}^{n}$ which is analytic in each
variable. If $\supp f\subseteq B(0,r)$ then
\[|\hat{f}(z)|\leq C_{N}(1+\|z\|)^{-N}e^{r|\Im z|}\]
for all $z$ and integers $N\geq 0$. Here $C_{N}$ is a constant
depending only on $N$ and $f$.
\end{lemma}
With the proof of Theorem~\ref{fundamental easy} we
come to the end of our general account of linear
constant coefficient partial differential equations.
For the rest of the course we shall study the
Laplacian, the heat and the wave operator.
We close this section by looking at the
heat and wave operator in the context of
Theorem~\ref{fundamental easy}.
\begin{example}[Example 5.7] If
\[P(D)=\frac{\partial\ }{\partial t}-
\sum\frac{\partial^{2}\ }{\partial x_{j}^{2}}\]
(the heat operator) then if $a<0$
\[|P(\tau+ia,\xi)|\geq -a\]
but if $a\geq 0$ there exists a $\xi\in{\mathbb R}^{n}$ such that
\[|P(\tau+ia,\xi)|=0.\]
\end{example}
\begin{example}[Example 5.8]\label{Fundamental wave}
If
\[P(D)=\frac{\partial^{2}\ }{\partial t^{2}}-
\sum\frac{\partial^{2}\ }{\partial x_{j}^{2}}\]
(the wave operator) then if $a\neq 0$
\[|P(\tau+ia,\xi)|\geq a^{4}>0\]
for $\tau\in{\mathbb R}$, $\xi\in{\mathbb R}^{n}$.
\end{example}
Note that our shifting contour argument gives one
fundamental solution for the heat operator
but two for the wave equation.
\section{The Laplacian}
\subsection{A fundamental solution}
The reader can probably guess a fundamental solution
for the Laplacian. We shall proceed more slowly in
order to introduce some useful ideas. Observe
first that by Theorem~\ref{Weyl} any
fundamental solution $E$ will have $\singsupp E\subseteq\{0\}$.
Since the Laplacian is rotationally invariant
an averaging argument shows (as one might fairly
confidently guess) that there must be a rotationally
invariant fundamental solution.
\begin{exercise}\label{quick Laplace}
We could now proceed as follows.
Suppose $f({\mathbf x})=g(r)$ with $r=\|{\mathbf x}\|$.
Show that
\[(\Delta f)({\mathbf x})=\frac{1}{r^{n-1}}
\frac{d\ }{dr}r^{n-1}fg(r).\]
If $f$ is smooth and $\Delta f=0$ on
${\mathbb R}^{n}\setminus\{0\}$ find $f$.
\end{exercise}
Instead of following the path set out in
Exercise~\ref{quick Laplace} we use another
nice idea --- that of homogeneity.
The following remark is due to Euler.
\begin{lemma}\label{homogeneous Euler}
Let $f:{\mathbb R}^{n}\setminus\{0\}
\rightarrow{\mathbb R}$ be continuously differentiable
and $m$ be an integer.
The following statements are equivalent.
(i) $f(\lambda x)=\lambda^{m}f(x)\ \ \text{for all $\lambda>0$.}$
(In other words, $f$ is homogeneous of degree $m$.)
(ii) ${\displaystyle
\left(\sum_{j=1}^{n}x_{j}
\frac{\partial\ }{\partial x_{j}}
-m\right)f(x)=0.}$
\end{lemma}
In view of Lemma~\ref{homogeneous Euler}
we make the following definition.
\begin{definition}\label{homogeneous}
If $u\in{\mathcal D}'$ then $u$ is homogeneous
of degree $m\in{\mathbb C}$ if
\[\left(\sum_{j=1}^{n}x_{j}
\frac{\partial\ }{\partial x_{j}}
-m\right)f(x)=0.\]
\end{definition}
\begin{lemma}\label{homogeneous results}
(i) The delta function $\delta_{0}$
on ${\mathbb R}^{n}$ is homogeneous of degree $-n$.
(ii) If is $u\in{\mathcal D}'$ is homogeneous
of degree $m$ then $\partial_{x_{j}}u$
is homogeneous
of degree $m-1$.
\end{lemma}
Looking at Lemma~\ref{homogeneous results}
we guess that one fundamental solution $E_{n}$
of the Laplacian will be radially symmetric
of degree $2-n$ and we arrive at the guess
\[E_{n}=C_{n}\|x\|^{2-n}.\]
If $n\neq 2$ we shall now define $E_{n}=C_{n}\|x\|^{2-n}$
and verify that it is indeed a fundamental solution
for the appropriate choice of $C_{n}$.
\begin{lemma}\label{verify Laplace} Suppose $n\neq 2$.
(i) $E_{n}$ is a well defined
distribution according to our usual formula
\[\langle E_{n},f\rangle=\int_{{\mathbb R}^{n}}E_{n}(x)f(x)\,dx.\]
(ii) $\supp\Delta E_{n}\subseteq \{0\}$.
(iii) $\Delta E_{n}$ is a multiple of $\delta_{0}$.
(iv) If $C_{n}=((2-n)\omega_{n-1})^{-1}$ where
$\omega_{n-1}$ is the area of the surface of the
unit sphere in ${\mathbb R}^{n}$ then
$\Delta E_{n}=\delta_{0}$.
\end{lemma}
If $n=2$ we use previous knowledge to guess that
\[E_{2}({\mathbf x})=-\frac{1}{2\pi}\log \|x\|.\]
The verification that this is indeed a fundamental
solution follows the pattern of Lemma~\ref{verify Laplace}.
\begin{theorem}[Theorem 5.1]\label{Fundamental Laplace}
The Laplacian
on ${\mathbb R}^{n}$ has the fundamental solution
\[E(x)=\frac{1}{(2-n)\omega_{n-1}\|x\|^{2-n}}\]
when $n\neq 2$ and
\[E(x)=-\frac{1}{2\pi}\log \|x\|\]
for $n=2$.
\end{theorem}
\subsection{Identities and estimates} The reader will
be familiar with the Gauss divergence theorem
and the associated Green's identities. There are
problems (or at least apparent problems) in
proving this for complicated domains but the standard
mathematical method proof is rigorous when applied
to the ball which is all we shall need.
\begin{theorem}[Theorem 5.2, Gauss divergence theorem]
Let $g:\overline{B(0,r)}\rightarrow{\mathbb R}^{n}$
be continuous on $\overline{B(0,r)}$
and continuously differentiable on $B(0,r)$.
Then
\[\int_{\|x\|1$}.
\end{alignat*}
If $E$ is the fundamental solution obtained earlier
we start by looking at $f_{1}=E*\tilde{g}$. We know that,
in a distributional sense,
\[\Delta E*\tilde{g}=\tilde{g}\]
but we wish to have a classical solution.
\begin{lemma}[Lemma 5.1]
(i) $f_{1}\in C^{1}({\mathbb R}^{n})$
and $\partial_{j}f_{1}=\partial_{j}E*\tilde{g}$.
(ii) There is a constant such that
$|f_{1}(x)|\leq A\|x\|^{2-n}$,
$|\partial_{j}f_{1}(x)|\leq A\|x\|^{1-n}$
for $\|x\|$ large.
\end{lemma}
\begin{lemma}[Lemma 5.2] We have $f_{1}$ infinitely
differentiable on ${\mathbb R}^{n}\setminus\{x\, :\, \|x\|=1\}$
and
\begin{alignat*}{2}
\Delta f_{1}&=g&&\qquad\text{for $\|x\|<1$}\\
\Delta f_{1}&=0&&\qquad\text{for $\|x\|>1$}.
\end{alignat*}
\end{lemma}
We thus have $\Delta f_{1}=g$ in $B$ but the boundary
conditions are wrong. To get round this we
use Kelvin's method of reflections. (If you
have done electrostatics you will recognise this
for dimension $n=2$.) We put
\[(Kf)(x)=\|x\|^{2-n}f\left(\frac{x}{\|x\|^{2}}\right).\]
\begin{lemma}\label{Solve Dirichlet}
If we set $f_{2}=Kf_{1}$ and $f=f_{1}-f_{2}$
then
(i) $\Delta f_{2}=0$ in $B$.
(ii) $f_{1}=f_{2}$ on $\partial B$.
(iii) $f$ solves our Dirichlet problem.
\end{lemma}
The proof of Lemma~\ref{Solve Dirichlet}~(i)
involves substantial calculation.
\begin{lemma}[Lemma 5.3]\label{substantial calculation}
(i) If $f$ is twice differentiable
\[\Delta(Kf)=\|x\|^{-4}K(\Delta f).\]
(ii) If $f$ is once differentiable and we write
$r\dfrac{\partial\ }{\partial r}=\sum x_{j}
\frac{\partial\ }{\partial x_{j}}$
we have
\[r\frac{\partial\ }{\partial r}=(-n+2)Kf
-K\left(r\frac{\partial\ }{\partial r}\right).\]
\end{lemma}
Lemma~\ref{substantial calculation} shows us that
$f_{2}$ is harmonic in $B\setminus\{0\}$ but
a little more work is needed to establish
the full result.
\begin{lemma}[Lemma 4] The function $f_{2}$
is harmonic on $B$.
\end{lemma}
Doing the appropriate simple substitutions we obtain
our final result.
\begin{theorem}[Theorem 5.4]
We work in ${\mathbb R}^{n}$ with $n\geq 3$.
If we set
\[G(x,y)=\frac{\|x-y\|^{2-n}}{\omega_{n-1}(n-2)}
-\frac{\|x\|^{2-n}\|\|x\|^{-2}x-y\|^{2}}{\omega_{n-1}(n-2)}\]
then
\[f(x)=\int_{\|y\|<1}G(x,y)g(y)\,dy\]
solves the dual Dirichlet problem.
\end{theorem}
The reader will recognise $G(x,y)$ as a Green's function.
We shall not have time to do the case $n=2$ but
the reader will find no problem in redoing our arguments to
obtain.
\begin{theorem} We work in ${\mathbb R}^{2}$.
If we set
\[G(x,y)=\frac{1}{2\pi}(\log\|x-y\|-\log\|\|x\|^{-2}x-y\|)\]
then
\[f(x)=\int_{\|y\|<1}G(x,y)g(y)\,dy\]
solves the dual Dirichlet problem.
\end{theorem}
\section{Dirichlet's problem for the ball and Poisson's formula}
We noted earlier that a solution of the dual Dirichlet
problem will give us a solution for Dirichlet's problem.
\begin{lemma} Consider the Dirichlet problem
for the unit ball $B$.
\begin{alignat*}{2}
\Delta f&=0&&\qquad\text{for $\|x\|<1$}\\
f&=h&&\qquad\text{for $\|x\|=1$}
\end{alignat*}
with $h$ infinitely differentiable.
If $n\geq 3$ the solution is given by
\[f(x)=\int_{\|y\|=1}P(x,y)h(y)\,dS(y)\]
where ${\displaystyle P(x,y)=\frac{d\ }{dn}G(x,y)}$ the
directional derivative along the outward normal.
\end{lemma}
\begin{lemma}\label{First Poisson}
Consider Let the Dirichlet problem
for the unit ball $B$.
\begin{alignat*}{2}
\Delta f&=0&&\qquad\text{for $\|x\|<1$}\\
f&=h&&\qquad\text{for $\|x\|=1$}
\end{alignat*}
with $h$ infinitely differentiable.
If $n\geq 2$ the solution is given by
\[f(x)=\int_{\|y\|=1}P(x,y)h(y)\,dS(y)\]
where
\[P(x,y)=\frac{1}{\omega_{n-1}}\frac{1-\|x\|^{2}}{\|x-y\|^{n}}\]
\end{lemma}
We call $P$ the Poisson kernel.
We can improve Lemma~\ref{First Poisson}
and provide a proof of the improvement which is
essentially
independent of the work already done.
\begin{lemma}\label{Second Poisson}
Consider the Dirichlet problem
for the unit ball $B$.
\begin{alignat*}{2}
\Delta f&=0&&\qquad\text{for $\|x\|<1$}\\
f&=h&&\qquad\text{for $\|x\|=1$}
\end{alignat*}
with $h$ continuous.
If $n\geq 2$ the solution is given by
\[f(x)=\int_{\|y\|<1}P(x,y)h(y)\,dy\]
\end{lemma}
We need a preliminary lemma.
\begin{lemma} (i) If $y$ is fixed with $\|y\|<1$ then
$P(x,y)$
is harmonic in $x$ for $\|x\|<1$.
(ii) ${\displaystyle \int_{\|y\|=1}P(x,y)\,dS(y)=1}$
\end{lemma}
Dilation and translation gives the following result.
\begin{lemma}\label{Ball} The Dirichlet problem for any
ball with continuous data has a unique solution.
\end{lemma}
Lemma~\ref{Ball} is a very useful tool as the proof
of the converse of Lemma~\ref{Gauss mean}~(a) shows.
\begin{lemma}[Proposition 5.7]\label{reverse Gauss}
Let $\Omega$ be open and $u:\Omega\rightarrow{\mathbb R}$
continuous. If the value of $u$ at any point $x\in{\Omega}$
is equal to its
average over any sufficiently small
sphere centred at that $x$ then $u$ is harmonic.
\end{lemma}
\begin{lemma}[Corollary 5.2]\label{near Morera}
The uniform limit of
harmonic functions is itself harmonic.
\end{lemma}
(Compare Morera's theorem in complex variable theory.)
Poisson's solution for the Dirichlet problem for the ball
gives a useful inequality.
\begin{theorem}[Harnack's inequality, Theorem 5.5]
If $f\in C(\overline{B(0,R)})$ is everywhere non-negative
and $f$ is harmonic on $B(0,R)$ then if
$\|x\|=r1$ the support
lies on the surface $\{(t,x)\, :\, \|x\|\leq t\}$
(but if $n\geq 5$ the form of $E_{n,+}$ is more complicated
than that of $E_{3,+}$, involving analogues
of $\delta_{0}'$, $\delta_{0}''$ and so on).
Notice the contrast with `elliptic regularity'
as described in Theorem~{Weyl}.
If $n$ is even the support of $E_{n,+}$ is spread out
over the whole light cone.
If $n=1$ the matter follows an idea of D'Alembert
familiar from 1B mathematical methods.
\begin{exercise}
Let $n=1$.
(i) Show that if we make the change of coordinates
$w=t+x$, $y=t-x$ the wave operator becomes
${\displaystyle 2\frac{\partial^{2}}{\partial w\partial y}}$.
(ii) Show that the fundamental solution
with support in $\{(w,y)\, :\, w\geq 0, y\geq 0\}$
is $\dfrac{1}{2}H(w)H(y)$.
\end{exercise}
A little thought shows that we have in fact
a very general solution to our forcing problem.
\begin{lemma}\label{forward wave}
Let $A$ be a real number. If $T$
is a distribution with
$\supp T\subseteq\{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\geq A\}$ then there exists a unique distribution
$S$ with $\supp S\subseteq\{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\geq A\}$ and
\[\Box S=T.\]
We have $S=E_{+}*T$. If $T$ is a $k$ times differentiable
function then so is $S$.
\end{lemma}
Reversing the sign of $t$ we obtain $E_{-}$
the backwards fundamental solution. We have
the analogue of Lemma~\ref{forward wave}.
\begin{lemma} Let $A$ be a real number. If $T$
is a distribution with
$\supp T\subseteq\{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\leq A\}$ then there exists a unique distribution
$S$ with $supp S\subseteq\{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\leq A\}$ and
\[\Box S=T.\]
We have $S=E_{-}*T$. If $T$ is a $k$ times differentiable
function then so is $S$.
\end{lemma}
If $T\in{\mathcal D}'$ we can write $T=T_{1}+T_{2}$
with
\[\supp T_{1}\subseteq \{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\geq A\}
\ \text{and}
\ \supp T_{2}\subseteq \{(t,x)\in{\mathbb R}\times{\mathbb R}^{n}
\, :\, t\leq B\}\]
for some $A$ and $B$.
\begin{lemma}~\label{full wave}
If $T$ is a distribution then we can find a distribution $S$
with
\[\Box S=T.\]
If $T$ is a $k$ times differentiable (respectively smooth)
function then we can choose $S$ to be $k$ times differentiable
(respectively smooth).
\end{lemma}
It is pretty obvious that the decomposition $T=T_{1}+T_{2}$
and the resulting solution $S$ in Lemma~\ref{full wave}
are not unique. A more direct way of seeing this is to
observe that if $u$ is a twice differentiable function
on ${\mathbb R}$ and $\omega\in{\mathbb R}^{n}$ is a vector
of unit length then $\Box u(t-x.\omega)=0$.
\begin{exercise} (i) Verify this.
(ii) Why should you have guessed this from physical
or other considerations.
(iii) Why does the result not contradict
Lemma~\ref{forward wave}.
\end{exercise}
The nature of the lack of uniqueness can be resolved
if we can prove the following theorem.
\begin{theorem}\label{Cauchy 1}
Suppose that
$u_{0},\ u_{1}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
are smooth. Then if $f$ is a continuous function
there is a unique solution of
\[\Box u=f\]
with
\[u(0,x)=u_{0}(x),\ \frac{\partial u}{\partial t}(0,x)=u_{1}(x).\]
\end{theorem}
Clearly it is enough to prove a simpler version.
\begin{theorem}\label{Cauchy 2}
Suppose that
$u_{0},\ u_{1}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
are smooth. Then
there is a unique solution of
\[\Box u=0\]
with
\[u(0,x)=u_{0}(x),\ \frac{\partial u}{\partial t}(0,x)=u_{1}(x).\]
\end{theorem}
It should be noted that if
\[\Box u=0\]
with
\[u(0,x)=u_{0}(x),\ \frac{\partial u}{\partial t}(0,x)=u_{1}(x),\]
then by the definition of $\Box$
\[\frac{\partial^{2} u}{\partial t^{2}}=\Delta u\]
so in particular
\[\frac{\partial^{2} u}{\partial^{2} t}(0,x)=\Delta u_{0}(x).\]
Again
\[\frac{\partial^{3} u}{\partial t^{3}}=
\frac{\partial u}{\partial t}\Delta u\]
so
\[\frac{\partial^{3} u}{\partial^{3} t}(0,x)=\Delta u_{1}(x)\]
and so on. Thus
${\displaystyle \frac{\partial^{j} u}{\partial^{j} t}(0,x)}$
is specified to all orders. We shall need a
converse observation.
\begin{lemma}\label{right Taylor}
If $u_{0},\ u_{1}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
are smooth we can
find smooth $u_{j}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
$[j\geq 2]$
such that if
$u:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
is smooth and
\[\frac{\partial^{j} u}{\partial^{j} t}(0,x)=u_{j}(x)\]
for all $j\geq 0$
then
\[\left(\frac{\partial^{k} u}{\partial^{k} t}\Box u\right)(0,x)=0\]
for all $x$.
\end{lemma}
We also need a result of Borel.
\begin{theorem}[Theorem 6.1, Borel's lemma]\label{Borel}
Given smooth $u_{j}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
we can find a smooth
$u:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
such that
\[\frac{\partial^{j} u}{\partial^{j} t}(0,x)=u_{j}(x)\]
for all $x$ and all $j\geq 0$.
\end{theorem}
Borel's lemma is plausible but not trivial. (The obvious
Taylor series argument fails because Taylor series need
not converge except at the origin.)
The proof of Theorem~\ref{Cauchy 2} is now easy. By
Theorem~\ref{Borel} and Lemma~\ref{right Taylor} we can
find a smooth function
$v:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
such that
\[\left(\frac{\partial^{k} v}{\partial^{k} t}\Box u\right)(0,x)=0\]
and
\[v(0,x)=u_{0}(x),\ \frac{\partial V}{\partial t}(0,x)=u_{1}(x).\]
If we set $f=\Box u$ then by our choice of $u$
all the partial derivatives of $f$ with respect to $t$
vanish when $t=0$ and the functions
$f_{1},\ f_{2}:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
defined by
\[f_{1}(t,x)=H(t)f(t,x),\ f_{2}(t,x)=(1-H(t))F(t,x)\]
are smooth. Setting
\[u=v-E_{+}*f_{1}-E_{-}*f_{2}\]
we have a solution of the required form. (Since $E_{+}*f_{1}$
is infinitely differentiable with support in $t\geq 0$
all its derivatives must vanish at $t=0$.)
To prove uniqueness, observe that by linearity we need only
prove that if $\Box u=0$ and $u$ and its first partial derivative
in $t$ vanish at $t=0$ then $u=0$.
But if $\Box u=0$ and $u$ and its first partial derivative
in $t$ vanish at $t=0$ then
all the partial derivatives of $u$ with respect to $t$
vanish when $t=0$. We can thus write
\[u=u_{+}+u_{0}\]
with $u_{\pm}$ smooth, supported in $\pm t\geq 0$
and satisfying $\Box u_{\pm}=0$. By Lemma~\ref{forward wave}
$u_{+}=0$. Similarly $u_{-}=0$ and we are done.
\section{The heat equation}
\subsection{Existence}
Recall that the heat operator
$J$ is given by
\[Ju(t,x)=\left(\frac{\partial\ }{\partial t}
-\Delta\right)u(t,x)\]
where $u:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$.
\begin{lemma} A fundamental solution of the heat
equation is given by
\begin{alignat*}{2}
K(t,x)&=\left(\frac{1}{4\pi t}\right)^{n/2}
\exp(-\dfrac{\|x\|^{2}}{4t})&&\qquad\text{if $t>0$,}\\
K(t,x)&=0&&\qquad\text{if $t>0$.}
\end{alignat*}
\end{lemma}
Note that we do not find a `backward fundamental solution' --
the heat equation has an arrow of time built in.
Since the only singularity of our fundamental solution
is at $(0,0)$ we can build a parametrix and apply the argument of
Theorem~\ref{Weyl} to obtain a corresponding result.
\begin{lemma}[Proposition 7.1]
If $u\in{\mathcal D}'$ and $J$ is the heat
operator then $u$ and $Ju$ have the same singular
support. In particular if $Ju$ is smooth so is $u$.
\end{lemma}
This property is sometimes called \emph{hypo-ellipticity}
--- saying that although the operator is not
elliptic it behaves like an elliptic
operator it behaves similarly in this respect.
We prove the next result by direct verification.
\begin{theorem}\label{heat exists}
If $u_{0}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
and $g:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
are continuous functions then the system
\[\left(\frac{\partial\ }{\partial t}
-\Delta\right)u(t,x)=g(t,x)\ \text{for $t>0$}\]
subject to
\[\lim_{t\rightarrow 0+}u(t,x)=u_{0}(x)\]
has a solution
\[u(t,x)=K_{t}*u_{0}+\int_{0}^{t}K_{t-s}*g_{s}\,ds\]
where $K_{t}(x)=K(t,x)$ and $g_{s}(x)=g(s,x)$.
\end{theorem}
If $g=0$ we observe that $u(t,x)$ is a smooth function of $x$
for fixed $t>0$ whatever the choice of $u_{0}$
this is another indication of the arrow of time.
\subsection{Uniqueness} We did not claim uniqueness
in Theorem~\ref{heat exists} because the
infinite propagation speed of heat exhibited by our solution
allows other (non-physical) solutions.
\begin{exercise} (This is included for interest, it
is non-examinable.) We work with $n=1$ for simplicity.
(i) Show formally (without worrying about rigour) that
if $g$ is smooth
\[u(t,x)=\sum_{k=0}^{\infty}\frac{g^{k}(t)x^{2k}}{(2k)!}\]
satisfies $Ju=0$.
(ii) If $g^{k}(0)=0$ for all $k$ but $g$ is not identically
zero then $u$ is a solution of $Ju=0$ with $u(t,x)\rightarrow 0$
as $t\rightarrow 0+$ but $u$ is not the zero function.
(iii) We now need to choose $g$ and make the argument rigorous.
It is up to the reader to choose whether to try this.
My own preference would be to manufacture $g$ by hand
but $g(t)=\exp(-1/t^{2})$ $[t\neq 0]$, $g(0)=0$
will do (see~\cite{John}, Section~7.1).
\end{exercise}
Because parabolic equations lie `on the boundary
of elliptic equations' we try and
use ideas about maxima of solutions similar to
those of Theorems~\ref{weak maximum}
and~\ref{Second Dirichlet unique}.
\begin{lemma} Consider the bounded open set
\[\Omega=\{(t,x)\, :\, \|x-y\|0$}\]
subject to
\[\lim_{t\rightarrow 0+}u(t,x)=u_{0}(x)\]
has exactly one solution.
\end{theorem}
We also have a result on continuous dependence on initial data.
\begin{exercise}\label{heat data}
Suppose $u_{1},\ u_{2}:{\mathbb R}^{n}\rightarrow{\mathbb R}$
and $g:{\mathbb R}\times{\mathbb R}^{n}\rightarrow{\mathbb R}$
are bounded continuous functions.
If $\tilde{u}_{j}$ is the bounded solution
of the system
\[\left(\frac{\partial\ }{\partial t}
-\Delta\right)\tilde{u}_{j}(t,x)=g(t,x)\ \text{for $t>0$}\]
subject to
\[\lim_{t\rightarrow 0+}\tilde{u}_{j}(t,x)=u_{j}(x)\]
for $j=1,\ 2$. Then
\[\sup_{t\geq 0,\ x\in{\mathbb R}^{n}}
\|\tilde{u}_{2}(t,x)-\tilde{u}_{1}(t,x)\|
=\sup_{x\in{\mathbb R}^{n}}
\|u_{2}(x)-u_{1}(t,x)\|.\]
\end{exercise}
\section{References} The reader
will be in doubt of my admiration for Friedlander's
little book~\cite{Friedlander}. The additions to
the second edition do not concern this course
but some niggling misprints have been removed
and you should tell your college library to buy
the second edition even it already has the first.
Friedlander's book mainly concerns the `distributional'
side of the course. Specific partial differential
equations are dealt with along with much else
in Folland's \emph{Introduction to Partial
Differential Equations}~\cite{Folland}. The two books
\cite{Friedlander}~and~\cite{Folland} are also
excellent further reading. The book~\cite{John}
is a classic by a major worker in the field.
\begin{thebibliography}{9}
\bibitem{Folland} G.~B.~Folland
\emph{Introduction to Partial Differential Equations}
2nd Edition, Princeton University Press, 1995. QA324
\bibitem{Friedlander}F.~G.~Friedlander and M.~Joshi
\emph{Introduction to the Theory of Distributions}
2nd Edition, CUP, 1998. QA324
\bibitem{John} F.~John
\emph{Partial Differential Equations} 4th Edition,
Springer-Verlag, 1982 QA1.A647
\end{thebibliography}
\end{document}