01recap.tex

% !TeX document-id = {f19fb972-db1f-447e-9d78-531139c30778}
% !BIB program = biber
\documentclass[compress]{beamer}
\usepackage[T1]{fontenc}
\usepackage{pifont}
\usetheme[block=fill,subsectionpage=progressbar,sectionpage=progressbar]{metropolis} 

\usepackage{wasysym}
\usepackage{etoolbox}
\usepackage[utf8]{inputenc}

\usepackage{threeparttable}
\usepackage{subcaption}

\usepackage{tikz-qtree}
\setbeamercovered{still covered={\opaqueness<1->{5}},again covered={\opaqueness<1->{100}}}


\usepackage{listings}

\lstset{
	basicstyle=\scriptsize\ttfamily,
	columns=flexible,
	breaklines=true,
	numbers=left,
	%stepsize=1,
	numberstyle=\tiny,
	backgroundcolor=\color[rgb]{0.85,0.90,1}
}


\lstnewenvironment{lstlistingoutput}{\lstset{basicstyle=\footnotesize\ttfamily,
		columns=flexible,
		breaklines=true,
		numbers=left,
		%stepsize=1,
		numberstyle=\tiny,
		backgroundcolor=\color[rgb]{.7,.7,.7}}}{}


\lstnewenvironment{lstlistingoutputtiny}{\lstset{basicstyle=\tiny\ttfamily,
		columns=flexible,
		breaklines=true,
		numbers=left,
		%stepsize=1,
		numberstyle=\tiny,
		backgroundcolor=\color[rgb]{.7,.7,.7}}}{}


\usepackage[american]{babel}
\usepackage{csquotes}
\usepackage[style=apa, backend = biber]{biblatex}
\DeclareLanguageMapping{american}{american-UoN}
\addbibresource{../../bdaca.bib}
\renewcommand*{\bibfont}{\tiny}

\usepackage{tikz}
\usetikzlibrary{shapes,arrows,matrix}
\usepackage{multicol}

\usepackage{subcaption}

\usepackage{booktabs}
\usepackage{graphicx}

\graphicspath{{../bdaca/pictures/}}

\makeatletter
\setbeamertemplate{headline}{%
	\begin{beamercolorbox}[colsep=1.5pt]{upper separation line head}
	\end{beamercolorbox}
	\begin{beamercolorbox}{section in head/foot}
		\vskip2pt\insertnavigation{\paperwidth}\vskip2pt
	\end{beamercolorbox}%
	\begin{beamercolorbox}[colsep=1.5pt]{lower separation line head}
	\end{beamercolorbox}
}
\makeatother


\setbeamercolor{section in head/foot}{fg=normal text.bg, bg=structure.fg}


\newcommand{\question}[1]{
	\begin{frame}[plain]
		\begin{columns}
			\column{.3\textwidth}
			\makebox[\columnwidth]{
				\includegraphics[width=\columnwidth,height=\paperheight,keepaspectratio]{mannetje.png}}
			\column{.7\textwidth}
			\large
			\textcolor{orange}{\textbf{\emph{#1}}}
		\end{columns}
\end{frame}}

\newcommand{\instruction}[1]{\emph{\textcolor{gray}{[#1]}}}


\title{Beyond Counting Words: Working with Word Embeddings}
\author[Damian Trilling]{Damian Trilling \\ ~ \\ \footnotesize{d.c.trilling@uva.nl \\@damian0604} \\ \url{www.damiantrilling.net}}
\date{12--13 April 2021}
\institute[UvA]{Afdeling Communicatiewetenschap \\Universiteit van Amsterdam}

\begin{document}

\begin{frame}{}
	\titlepage
\end{frame}

\begin{frame}{This part: Recap ``Working with textual data in Python''}
	\tableofcontents
\end{frame}


\setbeamercovered{transparent}

\section{Getting to know each other}


\begin{frame}{Damian}
	
	\begin{columns}
		\column{.3\textwidth}
		\makebox[\columnwidth]{
			\includegraphics[width=\columnwidth,height=\paperheight,keepaspectratio]{damian.jpg}}
		\column{.7\textwidth}
		dr. Damian Trilling \\
		Universitair Hoofddocent (Associate Professor) Communication in the Digital Society \\
		\begin{itemize}
			\item studied Communication Science in M\"unster and at the VU 2003--2009
			\item PhD candidate @ ASCoR 2009--2012
			\item political communication and journalism in a changing media environment
			\item computational research methods
		\end{itemize}
		@damian0604 ~~ d.c.trilling@uva.nl ~~ REC-C~8\textsuperscript{th}~floor ~~ \url{www.damiantrilling.net} 
	\end{columns}
\end{frame}


\question{Who are you, and what do you hope to get out of this workshop?}


\section[Pandas vs native]{Ways of working with data in Python}

\begin{frame}{Pandas}
Main data structure: the ``dataframe''
\begin{itemize}
	\item familiar; similar to R/SPSS/Stata
	\item great built-in methods for data wrangling
	\item we can easily apply operations to whole columns
\end{itemize}

\instruction{show in Notebook}

\end{frame}

\begin{frame}{Pandas}
However, 
\begin{itemize}[<+->]
	\item your dataset may be too large to keep in memory;
	\item it may not make sense to think of your data as a table;
	\item you may not be interested in statistitical calculations \emph{within} your dataframe (e.g., regress some column on some others).
\end{itemize}
\pause 
$\Rightarrow$ Collections of texts that we want to use for Machine Learning are not necessarily something we want to keep in a table
\end{frame}


\begin{frame}{Native data types}
	\begin{block}{Lists and dictionaries}
		\begin{description}
			\item[{\color{red}list}]<2-> \texttt{firstnames = ['Alice','Bob','Cecile'] \\
				 lastnames = ['Garcia','Lee','Miller']}
			\item[{\color{red}list}]<3->\texttt{ages = [18,22,45]}
			\item[{\color{red}dict}]<4-> \texttt{agedict = \{'Alice': 18, 'Bob': 22, 'Cecile': 45\} }
		\end{description}
		\onslide<5->{
		Note that the elements of a list, the keys of a dict, and the values of a dict can have any* datatype! (You can even mix them, but it's better to be consistent!)
		
		\tiny{*Well, keys cannot be mutable $\rightarrow$ see book}}
	\end{block}
\end{frame}


\begin{frame}{Native datatypes}
	\begin{block}{Retrieving specific items}
		\begin{description}
			\item[{\color{red}list}]<1-> \texttt{firstnames[0]} gives you the first entry\\ 
				\texttt{firstnames[-2]} gives you the one-but-last entry\\
				\texttt{firstnames[:2]} gives you entries 0 and 1\\
				\texttt{firstnames[1:3]} gives you entries 1 and 2\\
				\texttt{firstnames[1:]} gives you entries 1 until the end\\
			\item[{\color{red}dict}]<2-> \texttt{agedict["Alice"]} gives you 18 
		\end{description}
	
	\end{block}
\end{frame}


\question{Think of at least two different ways of storing data about some fictious persons (first name, last name, age, phone number, \ldots) using lists and/or dictionaries. What are the pros and cons?}


\section{Functions and methods}
\begin{frame}{Python lingo}
	\begin{block}{Functions}
		\begin{description}
			\item[{\color{red}functions}]<2-> Take an input and return something else \\ {\tt{int(32.43})} returns the integer 32. \texttt{len("Hello")} returns the integer 5.\\ 
			\item[{\color{red}methods}]<3-> are similar to functions, but directly associated with an object. {\tt{"SCREAM".lower()}} returns the string "scream"
		\end{description}
	\end{block}
	\onslide<4->{Both functions and methods end with \texttt{()}. Between the \texttt{()}, \emph{arguments} can (sometimes have to) be supplied.}
\end{frame}


\begin{frame}[fragile]{Some functions}
\begin{lstlisting}
len(x)        # returns the length of x
y = len(x)    # assign the value returned by len(x) to y
print(len(x)) # print the value returned by len(x)
print(y)      # print y
int(x)        # convert x to an integer
str(x)        # convert x to a string
sum(x)        # get the sum of x
\end{lstlisting}
\end{frame}

\question{How could you print the mean (average) of a list of integers using the functions on the previous slide?}


\begin{frame}[fragile]{Some methods}
Some string methods
\begin{lstlisting}
mystring = "Hi! How are you?"
mystring.lower()   # return lowercased string (doesn't change original!)
mylowercasedstring =  mystring.lower()  # save to a new variable
mystring =  mystring.lower()  # or override the old one
mystring.upper()   # uppercase
mystring.split()   # Splits on spaces and returns a list ['Hi!', 'How', 'are', 'you?']
\end{lstlisting}

We'll look into some list methods later.

\textbf{$\Rightarrow$ You can use TAB-completion in Jupyter to see all methods (and properties) of an object!}
\end{frame}


\begin{frame}[fragile]{Writing own functions}
You can write an own function:
\begin{lstlisting}
def addone(x):
    y = x + 1
    return y
\end{lstlisting}

Functions take some input (``argument'') (in this example, we called it \texttt{x}) and \emph{return} some result.

Thus, running
\begin{lstlisting}	
addone(5)
\end{lstlisting}
returns \tt{6}.
\end{frame}

\begin{frame}[fragile]{Writing own functions}

\begin{alertblock}{Attention, R users! (maybe obvious for others?)}
	You \emph{cannot}* apply the function that we just created on a whole list -- after all, it takes an int, not a list as input.
\end{alertblock}

(wait a sec foruntil we cover for loops later today, but this is how you'd do it (by calling the function for each element in the list separately):):

\begin{lstlisting}
mynumbers = [5, 3, 2, 4]
results = [addone(e) for e in mynumbers]
\end{lstlisting}

\vspace{1cm}
\tiny{* Technically speaking, you could do this by wrapping the \texttt{map} function around your own function, but that's not considered ``pythonic''. Don't do it ;-) \\}
\end{frame}


\section[Modifying lists \& dicts]{Modifying lists and dictionaries}
\begin{frame}[fragile]{Modifying lists}
	Let's use one of our first \textcolor{red}{method}s! Each \emph{list} has a method \texttt{.append()}:
\begin{block}{Appending to a list}
\begin{lstlisting}
mijnlijst = ["element 1", "element 2"]
anotherone = "element 3"   # note that this is a string, not a list!
mijnlijst.append(anotherone)
print(mijnlijst)
\end{lstlisting}
gives you:
\begin{lstlisting}
["element 1", "element 2", "element 3"]
\end{lstlisting}
\end{block}
\end{frame}


\begin{frame}[fragile]{Modifying lists}
\begin{block}{Merging two lists (= extending)}
\begin{lstlisting}
mijnlijst = ["element 1", "element 2"]
anotherone = ["element 3", "element 4"]
mijnlist.extend(anotherone)
print(mijnlijst)
\end{lstlisting}
gives you:
\begin{lstlisting}
["element 1", "element 2", "element 3", "element 4]
\end{lstlisting}
\end{block}
\end{frame}

\question{What would have happened if we had used \texttt{.append()} instead of \texttt{.extend()}?}

\question{Why do you think that the Python developers implemented \texttt{.append()} and \texttt{.extend()} as methods of a list and not as functions?}


\begin{frame}[fragile]{Modifying dicts}
\begin{block}{Adding a key to a dict (or changing the value of an existing key)}
\begin{lstlisting}
mydict = {"whatever": 42, "something": 11}
mydict["somethingelse"] = 76
print(mydict)
\end{lstlisting}
gives you:
\begin{lstlisting}
{'whatever': 42, 'somethingelse': 76, 'something': 11}
\end{lstlisting}
If a key already exists, its value is simply replaced.
\end{block}
\end{frame}


\section{for, if/elif/else, try/except}

\begin{frame}[fragile]{How can we structure our program?}
If we want to \emph{repeat} a block of code, exectute a block of code only \emph{under specific conditions}, or more generally want to structure our code, we use \emph{indention}.

	\begin{block}{Indention: The Python way of structuring your program}
		\begin{itemize}
			\item Your program is structured by TABs or SPACEs.
			\item Jupyter (or your IDE) handles (guesses) this for you, but make sure to not interfere and not to mix TABs or SPACEs!
			\item Default: four spaces per level of indention.
		\end{itemize}
	\end{block}
\end{frame}


\begin{frame}[fragile]{Indention}
\begin{block}{Structure}
	A first example of an indented block -- in this case, we want to \emph{repeat} this block:
\end{block}
\begin{lstlisting}
agedict = {'Zeus': None, 'Denis': 96, 'Alice': 18, 'Rebecca': 20 , 'Bob': 22, 'Cecile': 45}

myfriends = ['Alice','Bob','Cecile']

print ("The names and ages of my friends:")
for buddy in myfriends:
	print (f"My friend {buddy} is {agedict[buddy]} years old")
\end{lstlisting}

Output:
\begin{lstlisting}
My friend Alice is 18 years old
My friend Bob is 22 years old
My friend Cecile is 45 years old
\end{lstlisting}
\end{frame}

\begin{frame}[fragile]{What happened here?}

\begin{lstlisting}
for buddy in myfriends:
    print (f"My friend {buddy} is {agedict[buddy]} years old")
\end{lstlisting}

\small
	\begin{block}{The for loop}
\begin{enumerate}
	\item Take the first element from \texttt{myfriends} and call it \texttt{buddy} (like \texttt{buddy = myfriends[0]}) (line 1)
	\item Execute the indented block (line 2, but could be more lines)
	\item Go back to line 1, take next element  (like \texttt{buddy = myfriends[1]}) 
	\item Execture the indented block \ldots
	\item \ldots repeat until no elements are left \ldots
\end{enumerate}
	\end{block}
	
	\begin{block}{The f-string (\emph{formatted} string)}
If you prepend a string with an \texttt{f}, you can use curly brackets texttt{\{\}} to insert the value of a variable
	\end{block}
	
\end{frame}


\begin{frame}[fragile]{What happened here?}
\begin{lstlisting}
for buddy in myfriends:
    print (f"My friend {buddy} is {agedict[buddy]} years old")
\end{lstlisting}
\small
The line \emph{before} an indented block starts with a \emph{statement} indicating what should be done with the block and ends with a \texttt{:}

\footnotesize
\begin{block}{More in general, the \texttt{:} with indention indicates that}<2->
	\begin{itemize}
		\item<3-> the block is to be executed repeatedly (\texttt{for} statement) – e.g., for each element from a list, or until a condition is reached (\texttt{while} statement)
		\item<4-> the block is only to be executed under specific conditions (\texttt{if}, \texttt{elif}, and \texttt{else} statements)
		\item<5-> an alternative block should be executed if an error occurs in the block (\texttt{try} and \texttt{except} statements)
		\item<6-> a file is opened, but should be closed again after the block has been executed (\texttt{with} statement)
	\end{itemize}
\end{block}
\end{frame}


\begin{frame}[fragile]{Can we also loop over dicts?}
Sure! But we need to indicate how exactly:

\begin{lstlisting}
mydict = {"A":100, "B": 60, "C": 30}

for k in mydict:   # or mydict.keys()
    print(k)

for v in mydict.values():
    print(v)

for k,v in mydict.items():
    print(f"{k} has the value {v}")
\end{lstlisting}

\end{frame}


\begin{frame}[fragile]{Can we also loop over dicts?}
The result:

\begin{lstlisting}
A
B
C

100
60
30

A has the value 100
B has the value 60
C has the value 30
\end{lstlisting}

\end{frame}


\begin{frame}[fragile]{if statements}
	\begin{block}{Structure}
		Only execute block if condition is met
	\end{block}
	\begin{lstlisting}
x = 5
if x <10:
   print(f"{x} is smaller than 10")
elif x > 20:
   print(f"{x} is greater than 20")
else:
   print("No previous condition is met, therefore 10<={x}<=20")
\end{lstlisting}

\end{frame}


\question{Can you see how such an if statement could be particularly useful when nested in a for loop?}


\begin{frame}[fragile]{try/except}
\begin{block}{Structure}
If executed block fails, run another block instead
\end{block}
\begin{lstlisting}
x = "5"
try: 
   myint = int(x)
except:
   myint = 0
\end{lstlisting}

\pause 
\small{Again, more useful when executed repeatedly (in a loop or function):}
\begin{lstlisting}
mylist = ["5", 3, "whatever", 2.2]
myresults = []
for x in mylist:
    try: 
        myresults.append(int(x))
    except:
        myresults.append(None)
print(myresults)
\end{lstlisting}
\end{frame}


\begin{frame}[fragile]{List comprehensions}
\begin{block}{Structure}
A for loop that \texttt{.append()}s to an empty list can be replaced by a one-liner:
\end{block}
\begin{lstlisting}
mynumbers = [2,1,6,5]
mysquarednumbers = []
for x in mynumbers:
    mysquarednumbers.append(x**2))
\end{lstlisting}
is equivalent to:
\begin{lstlisting}
mynumbers = [2,1,6,5]
mysquarednumbers = [x**2 for x in mynumbers]
\end{lstlisting}

\pause 
Optionally, we can have a condition:
\begin{lstlisting}
mynumbers = [2,1,6,5]
mysquarednumbers = [x**2 for x in mynumbers if x>3]
\end{lstlisting}

\end{frame}


\begin{frame}[fragile]{List comprehensions}
	\begin{alertblock}{A very pythonic construct}
		\begin{itemize}
			\item Every for loop can also be written as a for loop that appends to a new list to collect the results. 
			\item For very complex operations (e.g., nested for loops), it can be easier to write out the full loops. 
			\item But mostly, list comprehensions are really great! (and much more concise!)
		\end{itemize}
	\end{alertblock}
\textbf{$\Rightarrow$ You really should learn this!}
	\end{frame}


\begin{frame}[fragile]{Generators}
\begin{block}{Structure}
	A lazy for loop (or function) that only generates its next element when it is needed:
\end{block}
\footnotesize
You can create a generator just like a list comprehension (but with \texttt{()} instead of \texttt{[]}):
\begin{lstlisting}
mynumbers = [2,1,6,5]
squaregen = (x**2 for x in mynumbers)   # these are NOT calculated yet
for e in squaregen:
    print(e)             # only here, we are calculating the NEXT item
\end{lstlisting}
\pause
Or like a function (but with \texttt{yield} instead of \texttt{return}):
\begin{lstlisting}
def squaregen(listofnumbers):
    for x in listofnumbers:
        yield(x**2)
mygen = squaregen(mynumbers)
for e in mygen:
    print(e)
\end{lstlisting}

\end{frame}


\begin{frame}[fragile]{Generators}
	\begin{alertblock}{A very memory and time efficient construct}
		\begin{itemize}
			\item Every function that \emph{returns} a list can also be written as a generator that \emph{yields} the elements of the list
			\item Especially useful if
			\begin{itemize}
				\item it takes a long time to calculate the list
				\item the list is very large and uses a lot of memory (hi big data!)
				\item the elements in the list are fetched from a slow source (a file, a network connection)
				\item you don't know whether you actually will need all elements
			\end{itemize}
		\end{itemize}

		\end{alertblock}

		\end{frame}


\section{Some examples of working with texts}


\begin{frame}{Counting words}
\begin{enumerate}
	\item Split text into words (``tokenization'')
	\item Count words
\end{enumerate}
\end{frame}

\begin{frame}[fragile]{Counting words}
\begin{lstlisting}
from collections import Counter
import re   # for alternative tokenization only

texts = ['This is the first text text text first', 'And another text yeah yeah']

# split on spaces
tokenized_texts = [t.split() for t in texts] 
# alternative that splits on all "non-word" characters: 
# tokenized_texts = [re.split(r"\W",t) for t in texts] 

c = Counter(tokenized_texts[0]) 
print(c.most_common(3) 

c2 = Counter(tokenized_texts[1]) 
print(c2.most_common(3)) 

\end{lstlisting}

\texttt{('text', 3), ('first', 2), ('This', 1)]}

\texttt{[('yeah', 2), ('And', 1), ('another', 1)]}


\end{frame}


\begin{frame}[fragile]{Some preprocessing}{(more about this later today)}
\textbf{\textcolor{red}{What do we have to improve?}}
\pause


lowercasing
\begin{lstlisting}
texts2 = [t.lower() for t in texts]
\end{lstlisting}

removing punctuation (method 1)
\begin{lstlisting}
texts3 = [t.replace('.','').replace(',','').replace('!','') for t in texts]   
\end{lstlisting}

removing punctuation (method 2)
\begin{lstlisting}
import string
trans = str.maketrans('', '', string.punctuation)
texts4 = [t.translate(trans) for t in texts]   
\end{lstlisting}
\end{frame}


\begin{frame}{Stopword removal: What and why?}
\begin{block}{Why remove stopwords?}
\begin{itemize}
\item If we want to identify key terms (e.g., by means of a word count), we are not interested in them
\item In many analyses, irrelevant information will dominate the picture
\item By removing them, we make our data and our models simpler and smaller
\end{itemize}
\end{block}
\end{frame}


\begin{frame}[fragile]{Stopword removal}

e.g., with \textit{list comprehension} and the \texttt{.join()}

\begin{lstlisting}
mystopwords = ['he', 'her', 'a', 'one', 'the]
t ='He gives her a beer and a cigarette.'
t2 = " ".join([w for w in t.split() if w.lower() not in mystopwords])
\end{lstlisting}

t2 now is ``gives beer cigarette''

\end{frame}


\begin{frame}[fragile]{ngrams}
Instead of just looking at single words (unigrams), we can also use adjacent words (bigrams).
\begin{lstlisting}
import nltk
texts = ['This is the first text text text first', 'And another text yeah yeah']
texts_bigrams = [["_".join(tup) for tup in nltk.ngrams(t.split(),2)] for t in texts]
print(texts_bigrams)
\end{lstlisting}
\texttt{[['This\_is',
	'is\_the',
	'the\_first',
	'first\_text',
	'text\_text',
	'text\_text',
	'text\_first'],
	['And\_another', 'another\_text', 'text\_yeah', 'yeah\_yeah']]
}

Typically, we would combine both.
\pause
\textbf{\textcolor{red}{What do you think? Why is this useful? (and what may be drawbacks?)}}
\end{frame}


\begin{frame}{NLP: What and why?}
\begin{block}{Why parse sentences?}
\begin{itemize}
\item To find out what grammatical function words have
\item and to get closer to the meaning.
\end{itemize}
\end{block}
\end{frame}

\begin{frame}[fragile]{Parsing a sentence}
\begin{lstlisting}
import nltk
sentence = "At eight o'clock on Thursday morning, Arthur didn't feel very good."
tokens = nltk.word_tokenize(sentence)
print (tokens)
\end{lstlisting}

\texttt{nltk.word\_tokenize(sentence)} is similar to sentence.split(), but compare handling of punctuation and the \texttt{didn't} in the output:
\begin{lstlisting}
['At', 'eight', "o'clock", 'on', 'Thursday', 'morning','Arthur', 'did', "n't", 'feel', 'very', 'good', '.']
\end{lstlisting}
\end{frame}


\begin{frame}[fragile]{Parsing a sentence}
Now, as the next step, you can ``tag'' the tokenized sentence:
\begin{lstlisting}
tagged = nltk.pos_tag(tokens)
print (tagged[0:6])
\end{lstlisting}
gives you the following:
\begin{lstlisting}
[('At', 'IN'), ('eight', 'CD'), ("o'clock", 'JJ'), ('on', 'IN'),
('Thursday', 'NNP'), ('morning', 'NN')]
\end{lstlisting}

\onslide<2->{
And you could get the word type of "morning" with \texttt{tagged[5][1]}!
}

\end{frame}


\begin{frame}{More NLP}
\Huge{Look at \url{http://nltk.org}}

\Huge{Look at \url{http://spacy.io}}

\end{frame}


\begin{frame}[fragile]{Example: Named Entity Recognition with spacy}
Terminal:

\begin{lstlisting}
sudo pip3 install spacy
sudo python3 -m spacy download nl    # or en, de, fr ....
\end{lstlisting}

Python:

\begin{lstlisting}
import spacy
nlp = spacy.load('nl')
doc = nlp('De docent heet Damian, en hij geeft vandaag les. Daarnaast is hij een onderzoeker, net zoals Anne. Ze werken allebei op de UvA')
for ent in doc.ents:
    print(ent.text,ent.label_)
\end{lstlisting}

returns:

\begin{lstlisting}
Damian MISC
Anne PER
UvA LOC
\end{lstlisting}  

\end{frame}


\begin{frame}[fragile]{Example: Lemmatization}

Lemmatization gives you the words in the form in which you would look them up in a good old dictionary.

\begin{lstlisting}
import spacy
nlp = spacy.load('en')
doc = nlp("I am running while generously greeting my neighbors")
lemmatized = " ".join([word.lemma_ for word in doc])
print(lemmatized)
\end{lstlisting}

returns:

\begin{lstlisting}
-PRON- be run while generously greet -PRON- neighbor
\end{lstlisting}  

\end{frame}


\begin{frame}[standout]
	The last example, \texttt{spacy}, in fact uses models that are trained very much like the techniques we will discuss in this course.
\end{frame}


\section{Takeaways}

\begin{frame}{Takeaways}
\begin{itemize}[<+->]
	\item We can organize data either in dataframes (pandas) or in lists, dictionaries, or simular (native Python)
	\item There are methods (which are associated with an object) and functions (which are not). Methods are cool because we can discover them with tab completion
	\item If a function takes, say, a string as input, we cannot apply them to a list. But if we have a list of strings, we can use a list comprehension.
	\item We can structure our code with if/elif/else conditions, for loops, and try/except control structures

\end{itemize}

\end{frame}	


\begin{frame}{Takeaways}
	\begin{itemize}[<+->]
		\item There is something called ``generator'' that you can loop over like a list, but the next item is only generated once it is used. This way, you can process data that are larger than your memory, and you can start processing before all data are in memory.
	\end{itemize}
	
\end{frame}	

\question{Everybody up to speed? Ready to get started?}


\end{document}