From afab728f53cacd22bb573894ca8215c9fa82975b Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Sun, 13 Feb 2022 13:50:40 -0500 Subject: [PATCH 1/7] [GoogleGroupsBridge] Add new bridge for Google Groups --- bridges/GoogleGroupsBridge.php | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 bridges/GoogleGroupsBridge.php diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php new file mode 100644 index 00000000000..613a5d7e555 --- /dev/null +++ b/bridges/GoogleGroupsBridge.php @@ -0,0 +1,73 @@ + array( + 'name' => 'Group id', + 'title' => 'The string that follows /g/ in the URL', + 'exampleValue' => 'announce', + 'required' => true + ), + 'account' => array( + 'name' => 'Account id', + 'title' => 'Some Google groups have an additional id following /a/ in the URL', + 'exampleValue' => 'mozilla.org', + 'required' => false + ) + )); + const CACHE_TIMEOUT = 3600; + + const TEST_DETECT_PARAMETERS = array( + 'https://groups.google.com/a/mozilla.org/g/announce' => array( + 'account' => 'mozilla.org', 'group' => 'announce' + ), + 'https://groups.google.com/g/ansible-project' => array( + 'account' => null, 'group' => 'ansible-project' + ), + ); + + const XPATH_EXPRESSION_ITEM = '//div[@class="yhgbKd"]'; + const XPATH_EXPRESSION_ITEM_TITLE = './/span[@class="o1DPKc"]'; + const XPATH_EXPRESSION_ITEM_CONTENT = './/span[@class="WzoK"]'; + const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ZLl54"]/@href'; + const XPATH_EXPRESSION_ITEM_AUTHOR = './/span[@class="z0zUgf"][last()]'; + const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/div[@class="tRlaM"]'; + const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; + const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; + const SETTING_FIX_ENCODING = true; + + protected function getSourceUrl() { + $source = self::URI; + + $account = $this->getInput('account'); + if($account) { + $source = $source . '/a/' . $account; + } + return $source . '/g/' . $this->getInput('group'); + } + + public function collectData() { + parent::collectData(); + + # There is duplication in the original item url, so this fixes that + $replace_len = strlen(self::getSourceUrl()) - strlen($this->getInput('group')) - 1; + foreach ($this->items as $item) { + $item->setURI(substr_replace($item->getURI(), self::URI, 0, $replace_len)); + } + } + + const URL_REGEX = '/^https:\/\/groups.google.com(?:\/a\/(?\S+))?(?:\/g\/(?\S+))/'; + + public function detectParameters($url) { + $params = array(); + if(preg_match(self::URL_REGEX, $url, $matches)) { + $params['group'] = $matches['group']; + $params['account'] = $matches['account']; + } + return $params; + } +} From 185fa07835ed79d677a9fc3f040a4657905dc822 Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Sun, 13 Feb 2022 18:25:55 -0500 Subject: [PATCH 2/7] Use defaultLinkTo to account for base href --- bridges/GoogleGroupsBridge.php | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index 613a5d7e555..09a0aa4a339 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -50,14 +50,8 @@ protected function getSourceUrl() { return $source . '/g/' . $this->getInput('group'); } - public function collectData() { - parent::collectData(); - - # There is duplication in the original item url, so this fixes that - $replace_len = strlen(self::getSourceUrl()) - strlen($this->getInput('group')) - 1; - foreach ($this->items as $item) { - $item->setURI(substr_replace($item->getURI(), self::URI, 0, $replace_len)); - } + protected function provideWebsiteContent() { + return defaultLinkTo(getContents($this->getSourceUrl()), self::URI); } const URL_REGEX = '/^https:\/\/groups.google.com(?:\/a\/(?\S+))?(?:\/g\/(?\S+))/'; From 8c2b6ba25b78b373d187a1658d419cd0ef2a65ac Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Fri, 25 Mar 2022 12:05:23 -0400 Subject: [PATCH 3/7] Remove newline --- bridges/GoogleGroupsBridge.php | 1 - 1 file changed, 1 deletion(-) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index 09a0aa4a339..f0e60abcb8f 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -42,7 +42,6 @@ class GoogleGroupsBridge extends XPathAbstract { protected function getSourceUrl() { $source = self::URI; - $account = $this->getInput('account'); if($account) { $source = $source . '/a/' . $account; From 7d9cef5d2d8a08b48c45acc18c47c309c79d9661 Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Fri, 25 Mar 2022 12:08:55 -0400 Subject: [PATCH 4/7] Add Newline --- bridges/GoogleGroupsBridge.php | 1 + 1 file changed, 1 insertion(+) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index f0e60abcb8f..09a0aa4a339 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -42,6 +42,7 @@ class GoogleGroupsBridge extends XPathAbstract { protected function getSourceUrl() { $source = self::URI; + $account = $this->getInput('account'); if($account) { $source = $source . '/a/' . $account; From 85e65a7aadcc28a61b53cba7ddacdcc6187b7d03 Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Fri, 25 Mar 2022 18:43:07 -0400 Subject: [PATCH 5/7] Use # delimiter --- bridges/GoogleGroupsBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index 09a0aa4a339..6379d387147 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -54,7 +54,7 @@ protected function provideWebsiteContent() { return defaultLinkTo(getContents($this->getSourceUrl()), self::URI); } - const URL_REGEX = '/^https:\/\/groups.google.com(?:\/a\/(?\S+))?(?:\/g\/(?\S+))/'; + const URL_REGEX = '#^https://groups.google.com(?:/a/(?\S+))?(?:/g/(?\S+))#'; public function detectParameters($url) { $params = array(); From 09d0abaa1307ee310d3abb99f551af93fec0b5de Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Fri, 25 Mar 2022 19:18:11 -0400 Subject: [PATCH 6/7] Fix detectParameters bug --- bridges/GoogleGroupsBridge.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index 6379d387147..1c018772d04 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -61,7 +61,8 @@ public function detectParameters($url) { if(preg_match(self::URL_REGEX, $url, $matches)) { $params['group'] = $matches['group']; $params['account'] = $matches['account']; + return $params; } - return $params; + return null; } } From 3ba0db1436a01200abc3def46445fa11da695386 Mon Sep 17 00:00:00 2001 From: Yaman Qalieh Date: Fri, 25 Mar 2022 19:55:40 -0400 Subject: [PATCH 7/7] Fix for automated testing --- bridges/GoogleGroupsBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/GoogleGroupsBridge.php b/bridges/GoogleGroupsBridge.php index 1c018772d04..6a1ff841bdf 100644 --- a/bridges/GoogleGroupsBridge.php +++ b/bridges/GoogleGroupsBridge.php @@ -9,7 +9,7 @@ class GoogleGroupsBridge extends XPathAbstract { 'group' => array( 'name' => 'Group id', 'title' => 'The string that follows /g/ in the URL', - 'exampleValue' => 'announce', + 'exampleValue' => 'governance', 'required' => true ), 'account' => array(