Skip to content
This repository has been archived by the owner on May 1, 2021. It is now read-only.

Commit

Permalink
Major refactor for better error messages and new Duolingo APIs
Browse files Browse the repository at this point in the history
  • Loading branch information
abesto committed Jan 18, 2015
1 parent 1c02775 commit 3db9705
Show file tree
Hide file tree
Showing 11 changed files with 264 additions and 123 deletions.
33 changes: 22 additions & 11 deletions src/main/scala/AnkiExporter.scala
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
import java.io.{PrintWriter, File}

class AnkiExporter(f: File) {
def write(words: Seq[Word]) {
val pw = new PrintWriter(f, "UTF-8")
def quote(sep: String)(s: Seq[String]) = '"' + s.mkString(sep) + '"'

def write(words: Seq[Word]): Either[String, Unit] = {
try {
pw.write("#### Start of scraping log\n")
for (logline <- Log.lines) pw.write("# " + logline + "\n")
pw.write("#### End of scraping log, data coming up\n")
for (word <- words) {
pw.write(
"\"" + word.foreign + "\";\"" + word.translations.mkString("\n") + "\";\"" + word.skill + "\"\n\n"
)
val pw = new PrintWriter(f, "UTF-8")
try {
pw.write("#### Start of scraping log\n")
for (logline <- Log.lines) pw.write("# " + logline + "\n")
pw.write("#### End of scraping log, data coming up\n")
for (word <- words) {
pw.write(
Seq(word.learned, word.native)
.map(quote("\n"))
.:+(quote(" ")(word.tags))
.mkString(";")
.++("\n\n")
)
}
Right()
} finally {
pw.close()
}
} finally {
pw.close()
} catch {
case e: Throwable => Left(e.toString)
}
}
}
43 changes: 41 additions & 2 deletions src/main/scala/Constants.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
class Constants {
val DUOLINGO_AUTH_HEADER = "auth_tkt"
object Constants {
object Duolingo {
val DOMAIN = "www.duolingo.com"
val BASE_URL = s"https://$DOMAIN"
object Login {
object Request {
val URL = s"$BASE_URL/login"
object Params {
val USERNAME = "login"
val PASSWORD = "password"
}
}
object Response {
object Failure {
val FAILURE = "failure"
val MESSAGE = "message"
}
object Success {
val RESPONSE = "response"
val OK = "OK"
val USERNAME = "username"
val USER_ID = "user_id"
}
}
val AUTH_HEADER = "auth_tkt"
}

object Flashcards {
val URL = s"$BASE_URL/api/1/flashcards"
object Params {
val COUNT = "n"
val ALLOW_PARTIAL_DECK = "allow_partial_deck"
}
}

object Vocabulary {
object Overview {
val URL = s"$BASE_URL/vocabulary/overview"
}
}
}
}
56 changes: 56 additions & 0 deletions src/main/scala/DuolingoFlashcardScraper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import com.ning.http.client.{Response, Cookie}
import dispatch._, Defaults._
import org.json4s._
import org.json4s.native.JsonMethods._
import org.json4s.DefaultFormats

class DuolingoFlashcardScraper(authToken: String) {
import Constants.Duolingo.Flashcards._
import Utils._

def fetch(limit: Long = 10000): Either[String, Flashcards] =
doFetch(limit) match {
case r@Right(f) =>
if (f.flashcard_data.length <= limit) {
Log.log(s"Fetched ${f.flashcard_data.length} flashcard items. Native language: ${f.ui_language}. Learned language: ${f.learning_language}.")
r
} else {
Log.log(s"Fetched ${f.flashcard_data.length} flashcard items, but there seem to be more. Trying again with a higher limit.")
fetch(limit * 2)
}
case l@Left(msg) =>
Log.log(msg)
l
}

protected def doFetch(limit: Long): Either[String, Flashcards] = {
val thisUrl = url(URL)
.addCookie(new Cookie(Constants.Duolingo.DOMAIN, Constants.Duolingo.Login.AUTH_HEADER, authToken, null, -1, true))
.addQueryParameter(Params.COUNT, limit.toString)
.addQueryParameter(Params.ALLOW_PARTIAL_DECK, "true")
.addCommonHeaders()

implicit val format = DefaultFormats
Log.log(s"Fetching flashcard items from ${thisUrl.url}")
Http(thisUrl).either.apply() match {
case Right(r: Response) =>
parse(r.getResponseBody)
.extractOpt[Flashcards]
.toRight(s"Failed to parse response: ${r.getResponseBody}")
case Left(ex: Throwable) => Left(s"Failed to fetch flashcard items: $ex")
}
}
}

case class Flashcards
( ui_language: String
, learning_language: String
, flashcard_data: Seq[Flashcard]
)

case class Flashcard
( ui_words: Seq[String]
, learning_words: Seq[String]
)


27 changes: 15 additions & 12 deletions src/main/scala/DuolingoLogin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,30 @@ import org.json4s._
import org.json4s.native.JsonMethods._
import scala.collection.JavaConverters._

object DuolingoLogin extends Constants {
val loginUrl = url("https://www.duolingo.com/login").POST
object DuolingoLogin {
import Constants.Duolingo._
import Utils._

val loginUrl = url(Login.Request.URL).POST

def login(username: String, password: String): Either[String, String] = {
Log.log("Trying to log in to Duolingo as " + username)
Http(loginUrl.addParameter("login", username)
.addParameter("password", password)
.addHeader("X-Requested-With", "XMLHttpRequest")).either.apply() match {
Http(loginUrl.addParameter(Login.Request.Params.USERNAME, username)
.addParameter(Login.Request.Params.PASSWORD, password)
.addCommonHeaders()).either.apply() match {
case Right(res: Response) => parse(res.getResponseBody) match {
case JObject(List(
JField("failure", JString(failure)),
JField("message", JString(message)))
JField(Login.Response.Failure.FAILURE, JString(failure)),
JField(Login.Response.Failure.MESSAGE, JString(message)))
) => Log.log("Login failed. Duolingo says: " + message)
Left(message)
case JObject(List(
JField("response", JString("OK")),
JField("username", JString(username2)),
JField("user_id", JString(userId))
JField(Login.Response.Success.RESPONSE, JString(Login.Response.Success.OK)),
JField(Login.Response.Success.USERNAME, JString(username2)),
JField(Login.Response.Success.USER_ID, JString(userId))
)) =>
Log.log(s"Logged in to Duolingo as $username id=$userId")
Right(res.getCookies.asScala.filter({c:Cookie => c.getName == DUOLINGO_AUTH_HEADER}).head.getValue)
Log.log(s"Logged in to Duolingo as $username user_id=$userId")
Right(res.getCookies.asScala.filter({c:Cookie => c.getName == Login.AUTH_HEADER}).head.getValue)
case other =>
Log.log("Unexpected response for login: " + other.toString)
Left("Unexpected response for login. This is a probably a bug in duolingo-to-anki.")
Expand Down
23 changes: 0 additions & 23 deletions src/main/scala/DuolingoTranslationScraper.scala

This file was deleted.

48 changes: 48 additions & 0 deletions src/main/scala/DuolingoVocabularyScraper.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import com.ning.http.client.{Response, Cookie}
import dispatch._, Defaults._
import org.json4s._
import org.json4s.native.JsonMethods._
import org.json4s.DefaultFormats

class DuolingoVocabularyScraper(authToken: String) {
import Constants.Duolingo.Vocabulary.Overview._
import Utils._

def fetch(): Either[String, Vocabulary] =
doFetch() match {
case r@Right(v) =>
Log.log(s"Fetched ${v.vocab_overview.length} words from vocabulary overview. Native language: ${v.from_language}. Learned language: ${v.learning_language}.")
r
case l@Left(msg) =>
Log.log(msg)
l
}

def doFetch(): Either[String, Vocabulary] = {
val thisUrl = url(URL)
.addCookie(new Cookie(Constants.Duolingo.DOMAIN, Constants.Duolingo.Login.AUTH_HEADER, authToken, null, -1, true))
.addCommonHeaders()

implicit val format = DefaultFormats
Log.log(s"Fetching vocabulary overview from ${thisUrl.url}")
Http(thisUrl).either.apply() match {
case Right(r: Response) =>
parse(r.getResponseBody)
.extractOpt[Vocabulary]
.toRight(s"Failed to parse response: ${r.getResponseBody}")
case Left(ex: Throwable) => Left(s"Failed to fetch vocabulary overview: $ex")
}
}

}

case class Vocabulary
( from_language: String
, learning_language: String
, vocab_overview: Seq[VocabularyItem])

case class VocabularyItem
( word_string: String
, pos: String
, skill: String
, skill_url_title: String)
57 changes: 0 additions & 57 deletions src/main/scala/DuolingoWordlistScraper.scala

This file was deleted.

Loading

0 comments on commit 3db9705

Please sign in to comment.