This repository has been archived by the owner on May 1, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Major refactor for better error messages and new Duolingo APIs
- Loading branch information
Showing
11 changed files
with
264 additions
and
123 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,30 @@ | ||
import java.io.{PrintWriter, File} | ||
|
||
class AnkiExporter(f: File) { | ||
def write(words: Seq[Word]) { | ||
val pw = new PrintWriter(f, "UTF-8") | ||
def quote(sep: String)(s: Seq[String]) = '"' + s.mkString(sep) + '"' | ||
|
||
def write(words: Seq[Word]): Either[String, Unit] = { | ||
try { | ||
pw.write("#### Start of scraping log\n") | ||
for (logline <- Log.lines) pw.write("# " + logline + "\n") | ||
pw.write("#### End of scraping log, data coming up\n") | ||
for (word <- words) { | ||
pw.write( | ||
"\"" + word.foreign + "\";\"" + word.translations.mkString("\n") + "\";\"" + word.skill + "\"\n\n" | ||
) | ||
val pw = new PrintWriter(f, "UTF-8") | ||
try { | ||
pw.write("#### Start of scraping log\n") | ||
for (logline <- Log.lines) pw.write("# " + logline + "\n") | ||
pw.write("#### End of scraping log, data coming up\n") | ||
for (word <- words) { | ||
pw.write( | ||
Seq(word.learned, word.native) | ||
.map(quote("\n")) | ||
.:+(quote(" ")(word.tags)) | ||
.mkString(";") | ||
.++("\n\n") | ||
) | ||
} | ||
Right() | ||
} finally { | ||
pw.close() | ||
} | ||
} finally { | ||
pw.close() | ||
} catch { | ||
case e: Throwable => Left(e.toString) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,42 @@ | ||
class Constants { | ||
val DUOLINGO_AUTH_HEADER = "auth_tkt" | ||
object Constants { | ||
object Duolingo { | ||
val DOMAIN = "www.duolingo.com" | ||
val BASE_URL = s"https://$DOMAIN" | ||
object Login { | ||
object Request { | ||
val URL = s"$BASE_URL/login" | ||
object Params { | ||
val USERNAME = "login" | ||
val PASSWORD = "password" | ||
} | ||
} | ||
object Response { | ||
object Failure { | ||
val FAILURE = "failure" | ||
val MESSAGE = "message" | ||
} | ||
object Success { | ||
val RESPONSE = "response" | ||
val OK = "OK" | ||
val USERNAME = "username" | ||
val USER_ID = "user_id" | ||
} | ||
} | ||
val AUTH_HEADER = "auth_tkt" | ||
} | ||
|
||
object Flashcards { | ||
val URL = s"$BASE_URL/api/1/flashcards" | ||
object Params { | ||
val COUNT = "n" | ||
val ALLOW_PARTIAL_DECK = "allow_partial_deck" | ||
} | ||
} | ||
|
||
object Vocabulary { | ||
object Overview { | ||
val URL = s"$BASE_URL/vocabulary/overview" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import com.ning.http.client.{Response, Cookie} | ||
import dispatch._, Defaults._ | ||
import org.json4s._ | ||
import org.json4s.native.JsonMethods._ | ||
import org.json4s.DefaultFormats | ||
|
||
class DuolingoFlashcardScraper(authToken: String) { | ||
import Constants.Duolingo.Flashcards._ | ||
import Utils._ | ||
|
||
def fetch(limit: Long = 10000): Either[String, Flashcards] = | ||
doFetch(limit) match { | ||
case r@Right(f) => | ||
if (f.flashcard_data.length <= limit) { | ||
Log.log(s"Fetched ${f.flashcard_data.length} flashcard items. Native language: ${f.ui_language}. Learned language: ${f.learning_language}.") | ||
r | ||
} else { | ||
Log.log(s"Fetched ${f.flashcard_data.length} flashcard items, but there seem to be more. Trying again with a higher limit.") | ||
fetch(limit * 2) | ||
} | ||
case l@Left(msg) => | ||
Log.log(msg) | ||
l | ||
} | ||
|
||
protected def doFetch(limit: Long): Either[String, Flashcards] = { | ||
val thisUrl = url(URL) | ||
.addCookie(new Cookie(Constants.Duolingo.DOMAIN, Constants.Duolingo.Login.AUTH_HEADER, authToken, null, -1, true)) | ||
.addQueryParameter(Params.COUNT, limit.toString) | ||
.addQueryParameter(Params.ALLOW_PARTIAL_DECK, "true") | ||
.addCommonHeaders() | ||
|
||
implicit val format = DefaultFormats | ||
Log.log(s"Fetching flashcard items from ${thisUrl.url}") | ||
Http(thisUrl).either.apply() match { | ||
case Right(r: Response) => | ||
parse(r.getResponseBody) | ||
.extractOpt[Flashcards] | ||
.toRight(s"Failed to parse response: ${r.getResponseBody}") | ||
case Left(ex: Throwable) => Left(s"Failed to fetch flashcard items: $ex") | ||
} | ||
} | ||
} | ||
|
||
case class Flashcards | ||
( ui_language: String | ||
, learning_language: String | ||
, flashcard_data: Seq[Flashcard] | ||
) | ||
|
||
case class Flashcard | ||
( ui_words: Seq[String] | ||
, learning_words: Seq[String] | ||
) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import com.ning.http.client.{Response, Cookie} | ||
import dispatch._, Defaults._ | ||
import org.json4s._ | ||
import org.json4s.native.JsonMethods._ | ||
import org.json4s.DefaultFormats | ||
|
||
class DuolingoVocabularyScraper(authToken: String) { | ||
import Constants.Duolingo.Vocabulary.Overview._ | ||
import Utils._ | ||
|
||
def fetch(): Either[String, Vocabulary] = | ||
doFetch() match { | ||
case r@Right(v) => | ||
Log.log(s"Fetched ${v.vocab_overview.length} words from vocabulary overview. Native language: ${v.from_language}. Learned language: ${v.learning_language}.") | ||
r | ||
case l@Left(msg) => | ||
Log.log(msg) | ||
l | ||
} | ||
|
||
def doFetch(): Either[String, Vocabulary] = { | ||
val thisUrl = url(URL) | ||
.addCookie(new Cookie(Constants.Duolingo.DOMAIN, Constants.Duolingo.Login.AUTH_HEADER, authToken, null, -1, true)) | ||
.addCommonHeaders() | ||
|
||
implicit val format = DefaultFormats | ||
Log.log(s"Fetching vocabulary overview from ${thisUrl.url}") | ||
Http(thisUrl).either.apply() match { | ||
case Right(r: Response) => | ||
parse(r.getResponseBody) | ||
.extractOpt[Vocabulary] | ||
.toRight(s"Failed to parse response: ${r.getResponseBody}") | ||
case Left(ex: Throwable) => Left(s"Failed to fetch vocabulary overview: $ex") | ||
} | ||
} | ||
|
||
} | ||
|
||
case class Vocabulary | ||
( from_language: String | ||
, learning_language: String | ||
, vocab_overview: Seq[VocabularyItem]) | ||
|
||
case class VocabularyItem | ||
( word_string: String | ||
, pos: String | ||
, skill: String | ||
, skill_url_title: String) |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.