diff --git a/Makefile b/Makefile index 0d94189..24aae05 100644 --- a/Makefile +++ b/Makefile @@ -19,14 +19,25 @@ pypi-html: run-tests: python3 -m unittest discover tests/ '*test.py' + +run-type-check: + mypy ./example.py run-coverage: coverage run --source=wikipediaapi -m unittest discover tests/ '*test.py' coverage report -m -release: run-tests pypi-html - if [ "x$(MSG)" = "x" ]; then \ - echo "Use make release MSG='some msg'"; \ +requirements: + pip3 install -r requirements.txt + +requirements-dev: + pip3 install -r requirements-dev.txt + +pre-release-check: run-tests run-coverage pypi-html run-type-check + +release: pre-release-check + if [ "x$(MSG)" = "x" -o "x$(VERSION)" = "x" ]; then \ + echo "Use make release MSG='some msg' VERSION='1.2.3'"; \ exit 1; \ fi; \ version=`grep __version__ wikipediaapi/__init__.py | sed -r 's/.*= \( *(.*), *(.*), *(.*)\)/\1.\2.\3/'`; \ @@ -35,13 +46,43 @@ release: run-tests pypi-html exit 1; \ fi; \ echo "Current version: $$version"; \ - short=`echo $$version | cut -f1-2 -d.`; \ - echo "Short version: $$short"; \ - sed -ri 's/^release = .*/release = "'$$version'"/' conf.py; \ - sed -ri 's/^version = .*/version = "'$$short'"/' conf.py; \ - git commit conf.py -m "Update version to $$version in conf.py"; \ + as_number() { \ + total=0; \ + for p in `echo $$1 | tr "." "\n"`; do \ + total=$$(( $$total * 1000 + $$p )); \ + done; \ + echo $$total; \ + }; \ + number_dots=`echo -n $(VERSION) | sed -r 's/[^.]//g' | wc -c`; \ + if [ ! "$${number_dots}" = "2" ]; then \ + echo "Version has to have format X.Y.Z"; \ + echo "Specified version is $(VERSION)"; \ + exit 2; \ + fi; \ + number_version=`as_number $$version`; \ + number_VERSION=`as_number $(VERSION);`; \ + if [ $$number_version -ge $$number_VERSION ]; then \ + echo -n "Specified version $(VERSION) ($$number_VERSION) is lower than"; \ + echo "current version $$version ($$number_version)"; \ + echo "New version has to be greater"; \ + exit 2; \ + fi; \ + has_documentation=`grep -c "^$(VERSION)\\$$" CHANGES.rst`; \ + if [ $$has_documentation -eq 0 ]; then \ + echo "There is no information about $(VERSION) in CHANGES.rst"; \ + exit 3; \ + fi; \ + short_VERSION=`echo $(VERSION) | cut -f1-2 -d.`; \ + commas_VERSION=`echo $(VERSION) | sed -r 's/\./, /g'`; \ + echo "Short version: $$short_VERSION"; \ + sed -ri 's/version=.*/version="'$(VERSION)'",/' setup.py; \ + sed -ri 's/^release = .*/release = "'$(VERSION)'"/' conf.py; \ + sed -ri 's/^version = .*/version = "'$$short_VERSION'"/' conf.py; \ + sed -ri 's/^Current version is: .*/Current version is: "'$(VERSION)'"/' wikipediaapi/__init__.py; \ + sed -ri 's/^__version__ = .*/__version__ = ('"$$commas_VERSION"')/' wikipediaapi/__init__.py; \ + git commit setup.py conf.py wikipediaapi/__init__.py -m "Update version to $(VERSION) for new release."; \ git push; \ - git tag $$version -m "$(MSG)"; \ + git tag $(VERSION) -m "$(MSG)"; \ git push --tags origin master # Catch-all target: route all unknown targets to Sphinx using the new @@ -51,3 +92,4 @@ release: run-tests pypi-html + diff --git a/conf.py b/conf.py index 1855f52..97756e8 100644 --- a/conf.py +++ b/conf.py @@ -57,7 +57,7 @@ # The short X.Y version. version = "0.3" # The full version, including alpha/beta/rc tags. -release = "0.3.5" +release = "0.3.7" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..ca31bce --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +mypy +sphinx diff --git a/requirements.txt b/requirements.txt index f229360..c9eb394 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ requests +typing diff --git a/setup.py b/setup.py index b6f8439..9ce1d8b 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def fix_doc(txt): setup( name='Wikipedia-API', - version='0.3.5', + version="0.3.7", description='Python Wrapper for Wikipedia', long_description=README + '\n\n' + CHANGES, classifiers=[ diff --git a/tests/extract_html_format_test.py b/tests/extract_html_format_test.py index 855981e..bd1eda0 100644 --- a/tests/extract_html_format_test.py +++ b/tests/extract_html_format_test.py @@ -108,3 +108,19 @@ def test_text(self): "

Text for section 5.1\n\n\n

" ) ) + + def test_with_erroneous_edit(self): + page = self.wiki.page('Test_Edit') + self.maxDiff = None + section = page.section_by_title('Section with Edit') + self.assertEqual(section.title, 'Section with Edit') + self.assertEqual( + page.text, + ( + "

Summary text\n\n

\n\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n" + "

Section with Edit

\n" + + "

Text for section with edit\n\n\n

" + ) + ) diff --git a/tests/mock_data.py b/tests/mock_data.py index e13d134..822e8da 100644 --- a/tests/mock_data.py +++ b/tests/mock_data.py @@ -108,6 +108,36 @@ def wikipedia_api_request(page, params): } } }, + 'en:action=query&prop=extracts&titles=Test_Edit&': { + "batchcomplete": "", + "warnings": { + "extracts": { + "*": "\"exlimit\" was too large for a whole article extracts request, lowered to 1." + } + }, + "query": { + "normalized": [ + { + "from": "Test_Edit", + "to": "Test Edit" + } + ], + "pages": { + "4": { + "pageid": 4, + "ns": 0, + "title": "Test Edit", + "extract": ( + "

Summary text\n\n

\n" + + "

Section 1

\n" + + "

Text for section 1

\n\n\n" + + "

Section with EditEdit

\n" + + "

Text for section with edit\n\n\n

" + ) + } + } + } + }, 'en:action=query&inprop=protection|talkid|watched|watchers|visitingwatchers|notificationtimestamp|subjectid|url|readable|preload|displaytitle&prop=info&titles=Test_1&': { "batchcomplete": "", "query": { diff --git a/wikipediaapi/__init__.py b/wikipediaapi/__init__.py index 7dd09d5..a4c7f31 100644 --- a/wikipediaapi/__init__.py +++ b/wikipediaapi/__init__.py @@ -2,7 +2,7 @@ Wikipedia-API is easy to use Python wrapper for `Wikipedias'`_ API. It supports extracting texts, sections, links, categories, translations, etc from Wikipedia. Documentation provides code snippets for the most common use cases. You can learn more at: http://wikipedia-api.readthedocs.io/en/latest/ -Current version is: 0.3.5 +Current version is: "0.3.7" ''' from .wikipedia import * -__version__ = (0, 3, 5) +__version__ = (0, 3, 7) diff --git a/wikipediaapi/wikipedia.py b/wikipediaapi/wikipedia.py index 48eaa3d..4d05947 100644 --- a/wikipediaapi/wikipedia.py +++ b/wikipediaapi/wikipedia.py @@ -1,10 +1,13 @@ import logging import re import requests +from typing import Dict, Any, List log = logging.getLogger(__name__) # https://www.mediawiki.org/wiki/API:Main_page +PagesDict = Dict[str, 'WikipediaPage'] + class ExtractFormat(object): # (Enum): # Wiki: https://goo.gl/PScNVV @@ -65,7 +68,9 @@ class Namespace(object): ExtractFormat.HTML: re.compile( r'\n? *]*?>(]*><\/span>)? *' + '(]*>)? *(]*><\/span>)? *(.*?) *' + - '(<\/span>)?<\/h\d>\n?' + '(<\/span>)?(Edit<\/span>)?<\/h\d>\n?' + # ^^^^ + # Example page with 'Edit' erroneous links: https://bit.ly/2ui4FWs ), # ExtractFormat.PLAIN.value: re.compile(r'\n\n *(===*) (.*?) (===*) *\n'), } @@ -80,7 +85,7 @@ def __init__( 'Wikipedia-API (https://github.com/martin-majlis/Wikipedia-API)' ), timeout=10.0 - ): + ) -> None: ''' Language of the API being requested. Select language from `list of all Wikipedias: @@ -95,7 +100,7 @@ def page( self, title: str, ns: int = 0 - ): + ) -> 'WikipediaPage': return WikipediaPage( self, title=title, @@ -139,6 +144,7 @@ def _structured( return page else: return self._build_structured(v, page) + return page def _info( self, @@ -178,6 +184,7 @@ def _info( return page else: return self._build_info(v, page) + return page def _langlinks( self, @@ -207,7 +214,8 @@ def _langlinks( return page else: return self._build_langlinks(v, page) - + return page + def _links( self, page: 'WikipediaPage' @@ -243,7 +251,39 @@ def _links( v['links'] += raw['query']['pages'][k]['links'] return self._build_links(v, page) + return page + + def _backlinks( + self, + page: 'WikipediaPage' + ) -> 'WikipediaPage': + """ + https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bbacklinks + https://www.mediawiki.org/wiki/API:Backlinks + """ + params = { + 'action': 'query', + 'list': 'backlinks', + 'bltitle': page.title, + 'bllimit': 500, + } + raw = self._query( + page, + params + ) + self._common_attributes(raw['query'], page) + v = raw['query'] + while 'continue' in raw: + params['blcontinue'] = raw['continue']['blcontinue'] + raw = self._query( + page, + params + ) + v['backlinks'] += raw['query']['backlinks'] + return self._build_backlinks(v, page) + + def _categories( self, page: 'WikipediaPage' @@ -271,6 +311,7 @@ def _categories( return page else: return self._build_categories(v, page) + return page def _categorymembers( self, @@ -306,7 +347,7 @@ def _categorymembers( def _query( self, page: 'WikipediaPage', - params: {} + params: Dict[str, Any] ): base_url = 'http://' + page.language + '.wikipedia.org/w/api.php' headers = { @@ -438,6 +479,23 @@ def _build_links( return page + def _build_backlinks( + self, + extract, + page + ): + self._common_attributes(extract, page) + for backlink in extract['backlinks']: + page._backlinks[backlink['title']] = WikipediaPage( + wiki=self, + title=backlink['title'], + ns=backlink['ns'], + language=page.language + ) + + return page + + def _build_categories( self, extract, @@ -501,13 +559,13 @@ class WikipediaPageSection(object): def __init__( self, title: str, - level=0, - text='' - ): + level: int =0, + text: str ='' + ) -> None: self._title = title self._level = level self._text = text - self._section = [] + self._section = [] # type: List['WikipediaPageSection'] @property def title(self) -> str: @@ -522,7 +580,7 @@ def text(self) -> str: return self._text @property - def sections(self) -> ['WikipediaPageSection']: + def sections(self) -> List['WikipediaPageSection']: return self._section def __repr__(self): @@ -569,21 +627,23 @@ def __init__( ns: int = 0, language: str = 'en', url: str = None - ): + ) -> None: self.wiki = wiki - self._summary = '' - self._section = [] - self._section_mapping = {} - self._langlinks = {} - self._links = {} - self._categories = {} - self._categorymembers = {} + self._summary = '' # type: str + self._section = [] # type: List[WikipediaPageSection] + self._section_mapping = {} # type: Dict[str, WikipediaPageSection] + self._langlinks = {} # type: PagesDict + self._links = {} # type: PagesDict + self._backlinks = {} # type: PagesDict + self._categories = {} # type: PagesDict + self._categorymembers = {} # type: PagesDict self._called = { 'structured': False, 'info': False, 'langlinks': False, 'links': False, + 'backlinks': False, 'categories': False, 'categorymembers': False, } @@ -619,18 +679,18 @@ def summary(self) -> str: return self._summary @property - def sections(self) -> [WikipediaPageSection]: + def sections(self) -> List[WikipediaPageSection]: if not self._called['structured']: self._fetch('structured') return self._section - def section_by_title(self, title) -> WikipediaPageSection: + def section_by_title(self, title: str) -> WikipediaPageSection: if not self._called['structured']: self._fetch('structured') return self._section_mapping[title] @property - def text(self): + def text(self) -> str: txt = self.summary if len(txt) > 0: txt += "\n\n" @@ -659,25 +719,31 @@ def combine(sections, level): return txt.strip() @property - def langlinks(self): + def langlinks(self) -> PagesDict: if not self._called['langlinks']: self._fetch('langlinks') return self._langlinks @property - def links(self): + def links(self) -> PagesDict: if not self._called['links']: self._fetch('links') return self._links @property - def categories(self): + def backlinks(self) -> PagesDict: + if not self._called['backlinks']: + self._fetch('backlinks') + return self._backlinks + + @property + def categories(self) -> PagesDict: if not self._called['categories']: self._fetch('categories') return self._categories @property - def categorymembers(self): + def categorymembers(self) -> PagesDict: if not self._called['categorymembers']: self._fetch('categorymembers') return self._categorymembers