% -*- compile-command: "rubber -d --unsafe hw-08-dynamic-programming-2.tex" -*-
\documentclass[11pt]{tufte-handout}
\usepackage[num=8,date=November\ 1,topic=Dynamic\ programming\ II]{../algorithms-hw}
\begin{document}
\coversheet
% \begin{question}[Micah's cafeteria problem]
% The Yankees have made it to the World Series against your favorite
% team, the Houston Astros. The World Series is a best of 7 series,
% which means that the first team to win 4 total games is declared the
% winner. Thus, the series can be as short as 4 games or as long as 7
% games. As an amateur gambler, you plan to place bets on each of the
% games in the series. Unfortunately, your gambling exploits from the
% Academy Awards have left you with only \$100 in your pocket.
% \marginnote{\emph{Note}: There is no probability involved in this
% question---your strategy is based purely on the wins and losses of
% the two teams in the series.} While your love for the Astros is
% unbounded, so too is your enmity for the Yankees. This acrimony has
% led you to the following decision: {\em If the Yankees win, you want
% to lose all \$100, but if the Astros win, you want to double your
% money.} What should your strategy be? In particular, how much
% money should you bet on the first game?
% \begin{enumerate}[label=(\alph*)]
% \item Start by letting $p(i,j)$ be your current winnings or
% losings (that is, the amount gained or lost relative to your
% starting \$100) when the Astros have $i$ wins and the Yankees have
% $j$ wins. For example, $p(4,1)=100$, because if the Astros win, you
% should win \$100, while $p(1,4)=-100$ since if the Yankees win you
% should lose \$100. In general, what are the base cases for $p$?
% \item Write a recursive definition for $p(i,j)$.
% \item Now, $p(0,0)$ should be 0, so $p(1,0)$ should reveal your
% bet. What is it?
% \end{enumerate}
% \end{question}
\begin{question} \label{q:subset} This problem is similar to one we
did in class, but slightly more general. Suppose we have a set
$\{x_1, x_2, \dots, x_n\}$ of $n$ positive integers, and a target
sum $S$, which is a positive integer. In class, we considered
finding a subset of $\{x_1, \dots, x_n\}$ whose sum is \emph{exactly
equal to} $S$. Instead, let's now consider finding a subset whose
sum is \emph{as close to $S$ as possible without going over}; that
is, the subset with the largest possible sum $\leq S$. For example,
given the set $\{1,2,7,12\}$ and the target sum $18$, there is no
subset which sums exactly to $18$, but the one which comes closest
without going over is $\{12,2,1\}$ which sums to $15$. Note that
$\{12,7\}$, with a sum of $19$, is closer to the target in an
absolute sense, but it is greater than the target; we are interested
in the biggest sum which is $\leq$ the target.
\begin{enumerate}[label=(\alph*)]
\item Describe a simple brute-force algorithm for solving this
problem. What is the running time of the algorithm?
\item \marginnote{\emph{\ref{q:subset}}\quad\qrcode{Let\ $m[k,s]$\ be\
the\ best\ possible\ sum\ using\ only\ $x_1$\ through\ $x_k$\ which\ is\ at\
most\ $s$.}}
Using dynamic programming, describe an algorithm with running time $\Theta(nS)$.
Be sure that you explain how to find not only the maximum possible
sum, but also the actual subset which has that sum. Justify the
correctness of your algorithm.
\item This is known as a \emph{psuedopolynomial-time} algorithm: the
running time is a polynomial in the \emph{value} of $S$, but
actually exponential in terms of the \emph{size} of the input
(\emph{i.e.} the number of bits needed to represent $S$).
Give one example of a set of inputs for which your dynamic programming
solution would be faster, and one example of a set of inputs for
which the brute force algorithm would be faster.
\end{enumerate}
\end{question}
\begin{question}
Consider the problem of making change for $C$ cents using the fewest
possible number of coins. Assume that each coin's value is an
integer.
\begin{enumerate}[label=(\alph*)]
\item Describe a greedy algorithm to make change for $C$ cents using
US quarters, dimes, nickels, and pennies.\marginnote{It turns out
that the greedy algorithm is actually optimal for US coins (and
most real-world coin systems), though coming up with a proof of
this fact is nontrivial.}
\item Give a set of coin denominations for which the greedy
algorithm does not yield an optimal solution. Your set should
include a penny so that there is a solution for every value of $C$.
\item Design and analyze an algorithm to make change using the fewest
number of coins that works for any set of coins. That is, as input
your algorithm should take
\begin{itemize}
\item $n$, the number of different coin types;
\item a list $c_1$, $c_2$, \dots, $c_n$ giving the values of the
different coins (you may assume they are already sorted from
smallest to largest); and
\item the number of cents $C$ we would like to make change for.
\end{itemize}
As output your algorithm should either report that it is not
possible to make the required amount $C$ using the given coins, or
give a multiset\footnote{A multiset is like a set that allows
duplicate elements.} of coins which add up to $C$ such that the
number of coins in the multiset is as small as possible. For
example, if given as input $c_1 = 1$, $c_2 = 5$, $c_3 = 20$ and the
target value $C = 47$, your algorithm should output
$\{20, 20, 5, 1, 1\}$. Note that we assume there is an unlimited
supply of coins of each type. Be sure to justify your algorithm's
correctness and analyze its time complexity.
\end{enumerate}
\end{question}
\begin{question}[Derived from K\&T 6.6] \label{q:format}
In a word processor, the goal of loose justification is to take text with a ragged right margin, like this,
\begin{verbatim}
Call me Ishmael.
Some years ago,
never mind how long precisely,
having little or no money in my purse,
and nothing particular to interest me on shore,
I thought I would sail about a little
and see the watery part of the world.
\end{verbatim}
and turn it into text whose right margin is ``as even as possible'',
like this:
\begin{verbatim}
Call me Ishmael. Some years ago, never
mind how long precisely, having little
or no money in my purse, and nothing
particular to interest me on shore, I
thought I would sail about a little
and see the watery part of the world.
\end{verbatim}
To make this precise enough for us to start thinking about how to
write a justifier for text, we need to figure out what it means for
the right margins to be ``even''. Suppose our text consists of a
sequence of {\em words}, $W=\{w_{1}, w_{2}, \ldots, w_{n}\}$ where
$w_{i}$ consists of $c_{i}$ characters. We have a maximum line length
of $L$. We will assume we have a fixed-width font, so we just need to
make sure that the number of characters on each line is no more than
$L$.
A {\em formatting of $W$} consists of a partition of the words in $W$ into {\em lines}. In the words assigned to a single line, there should be a space after each word except the last; and so if $ w_{j}, w_{j+1}, \ldots, w_{k}$ are assigned to one line, then we should have
\[
c_{k} + \sum_{j \leq i < k}(c_{i} + 1) \leq L.
\]
We will call an assignment of words to a line {\em valid} if it
satisfies this inequality. The difference between the left-hand side
and the right-hand side will be called the {\em slack} of the
line---that is, the number of spaces remaining at the right margin.
For example, suppose $L = 10$. Then
\begin{verbatim}
Call me Ishmael.
\end{verbatim}
is not valid, since it has length $(4+1) + (2+1) + 8$ which is greater
than $10$. On the other hand,
\begin{verbatim}
Call me
\end{verbatim}
is valid, and has a slack of $3$, since it has length only $7$,
leaving $3$ remaining spaces at the end.
We will say that a formatting is optimal when the sum of the {\em
squares} of the slacks of all lines (including the last line) is
minimized.
\begin{enumerate}[label=(\alph*)]
\item Describe a greedy algorithm to find a formatting of a list of
words, and give an example where your greedy algorithm does
\emph{not} produce an optimal solution.
\item Using dynamic programming,
\marginnote[-1.5in]{\emph{\ref{q:format}}\quad\qrcode{Let\ score[i]\
denote\ the\ best\ score\ achievable\ using\ only\ the\ first\
i\ words.}\vspace{1em} \\ Medium hint}\marginnote{\emph{\ref{q:format}}\quad\qrcode{To\
compute\ score[k],\ focus\ on\ the\ last\ line.\ First\ try\
putting\ just\ one\ word\ on\ the\ last\ line,\ then\ try\
two\ words,\ then\ three,\ ...\ and\ pick\ the\ best\ resulting\
score.}\vspace{1em} \\ Big hint}design and analyze an efficient algorithm to find an
optimal formatting of a set of words $W$ into valid lines for a
given line length $L$. (As usual, ``analyze'' means to prove it is
correct, and analyze its asymptotic running time.)
\item Why did we use the sum of the {\em squares} instead of just,
say, the sum? That is, what sort of bias does this optimization
function create?
\end{enumerate}
\end{question}
\begin{question}
(\textbf{Optional}/\textbf{Extra Credit})
If you want some practice actually implementing a dynamic
programming solution to a problem, write a program that implements
your algorithm from Question~\ref{q:format}. Your program should
take two command-line arguments: (1) an integer representing the
maximum line length; and (2) a file name. It should then output a
justified version of the file to {\tt stdout} using the algorithm
above.
For example, suppose \verb|lorem.txt| contains the text:
\begin{verbatim}
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque
rhoncus interdum odio, mattis finibus eros imperdiet non. Praesent egestas lectus.
\end{verbatim}
Then running your program with the arguments \verb|25| and
\verb|lorem.txt| should print
\begin{verbatim}
Lorem ipsum dolor sit
amet, consectetur
adipiscing elit. Quisque
rhoncus interdum
odio, mattis finibus
eros imperdiet non.
Praesent egestas lectus.
\end{verbatim}
which is an optimal formatting of the text into lines of length at
most $25$.
% If you want to use Python, I have prepared a skeleton program from
% which you can start, available on the course website. However, you
% may use any programming language you wish.
% \marginnote{Ask me if you
% want some ideas on how to write it in Haskell, which has a really
% slick way to express dynamic programming algorithms using lazy,
% recursively defined arrays.}
Also available is a file
\verb|neruda.tar| which contains a Pablo Neruda poem together with
two outputs: \verb|neruda.50.out| and \verb|neruda.30.out| are the
results of running my solution on \verb|neruda| using a line length
of $50$ and $30$, respectively. Note that in both cases, there are
multiple correct solutions with the same minimum score. Your program
may not produce exactly the same output as mine, but you should
ensure that it produces a solution with the same score.
% You should submit your program on Moodle. If necessary, you should
% also submit a file \verb|README.txt| with instructions explaining
% how to compile and run your program.
\end{question}
\end{document}