Created DataSource class

Implemented a Zugaina fetcher for fetching search results from zugaina
author: Mykyta Holubakha <hilobakho@gmail.com> 2017-08-18 08:58:30 +0300
committer: Mykyta Holubakha <hilobakho@gmail.com> 2017-08-18 08:58:30 +0300
commit: 2b385f3b47656913d98b0df0abc3099554d4ba3a (patch)
tree: a6e71cbeceb3fba0ee92fd3ade9d8d55c9d6d553
parent: Fixed iquery (diff)
download: pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.gz
pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.bz2
pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.zip
2 files changed, 79 insertions, 0 deletions
diff --git a/pomu/data/datasource.py b/pomu/data/datasource.py
new file mode 100644
index 0000000..53fcc62
--- /dev/null
+++ b/pomu/data/datasource.py
@@ -0,0 +1,19 @@
+"""
+Base DataSource class
+"""
+
+class DataSource():
+    def __init__(self, query):
+        pass
+
+    def page_count(self):
+        pass
+
+    def get_page(self, page):
+        pass
+
+    def list_items(self, ident):
+        pass
+
+    def get_item(self, ident):
+        pass
diff --git a/pomu/data/zugaina.py b/pomu/data/zugaina.py
new file mode 100644
index 0000000..25437fc
--- /dev/null
+++ b/pomu/data/zugaina.py
@@ -0,0 +1,60 @@
+"""
+gpo.zugaina.org searcher and fetcher
+"""
+import lxml.html
+import requests
+
+from pomu.data.datasource import DataSource
+
+BASE_URL = 'https://gpo.zugaina.org/'
+SBASE_URL = BASE_URL + 'Search?search={}&page={}'
+
+class ZugainaDataSource(DataSource):
+
+    def __init__(self, query):
+        self.query = query
+        self.pagecache = {}
+        self.itemcache = {}
+        self.pagecount = -1
+
+    def page_count(self):
+        if self.pagecount > 0:
+            return self.pagecount
+        text = self.fetch_page(1)
+        doc = lxml.html.document_fromstring(text)
+        field = doc.xpath('//div[@class="pager"]/span')[0].text
+        self.pagecount = (field.split(' ')[-1] + 49) // 50
+        return self.pagecount
+
+    def get_page(self, page):
+        text = self.fetch_page(page)
+        doc = lxml.html.document_fromstring(text)
+        return [(strip(x.text), x.getchildren()[0].text)
+                for x in doc.xpath('//div[@id="search_results"]/a/div')]
+
+    def list_items(self, ident):
+        text = self.fetch_item(ident)
+        doc = lxml.html.document_fromstring(text)
+        res = []
+        for div in doc.xpath('//div[@id="ebuild_list"]/ul/div'):
+            id_ = div.xpath('li/a')[0].get('href').split('/')[3]
+            pv = div.xpath('li/div/b').text
+            overlay = div.xpath('@id')
+            res.append(id_, pv, overlay)
+
+    def get_item(self, ident):
+        return results.get(BASE_URL + 'AJAX/Ebuild/' + ident).text
+
+    def fetch_item(self, ident):
+        if ident in self.itemcache:
+            return self.itemcache[ident]
+        res = requests.get(BASE_URL + ident).text
+        return res
+
+
+    def fetch_page(self, page):
+        if page in self.pagecache:
+            return self.pagecache[page]
+        res = requests.get(SBASE_URL.format(self.query, page)).text
+        self.pagecache[page] = res
+        return res
author	Mykyta Holubakha <hilobakho@gmail.com>	2017-08-18 08:58:30 +0300
committer	Mykyta Holubakha <hilobakho@gmail.com>	2017-08-18 08:58:30 +0300
commit	2b385f3b47656913d98b0df0abc3099554d4ba3a (patch)
tree	a6e71cbeceb3fba0ee92fd3ade9d8d55c9d6d553
parent	Fixed iquery (diff)
download	pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.gz pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.bz2 pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.zip