aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMykyta Holubakha <hilobakho@gmail.com>2017-08-18 08:58:30 +0300
committerMykyta Holubakha <hilobakho@gmail.com>2017-08-18 08:58:30 +0300
commit2b385f3b47656913d98b0df0abc3099554d4ba3a (patch)
treea6e71cbeceb3fba0ee92fd3ade9d8d55c9d6d553
parentFixed iquery (diff)
downloadpomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.gz
pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.tar.bz2
pomu-2b385f3b47656913d98b0df0abc3099554d4ba3a.zip
Created DataSource class
Implemented a Zugaina fetcher for fetching search results from zugaina
-rw-r--r--pomu/data/datasource.py19
-rw-r--r--pomu/data/zugaina.py60
2 files changed, 79 insertions, 0 deletions
diff --git a/pomu/data/datasource.py b/pomu/data/datasource.py
new file mode 100644
index 0000000..53fcc62
--- /dev/null
+++ b/pomu/data/datasource.py
@@ -0,0 +1,19 @@
+"""
+Base DataSource class
+"""
+
+class DataSource():
+ def __init__(self, query):
+ pass
+
+ def page_count(self):
+ pass
+
+ def get_page(self, page):
+ pass
+
+ def list_items(self, ident):
+ pass
+
+ def get_item(self, ident):
+ pass
diff --git a/pomu/data/zugaina.py b/pomu/data/zugaina.py
new file mode 100644
index 0000000..25437fc
--- /dev/null
+++ b/pomu/data/zugaina.py
@@ -0,0 +1,60 @@
+"""
+gpo.zugaina.org searcher and fetcher
+"""
+import lxml.html
+import requests
+
+from pomu.data.datasource import DataSource
+
+BASE_URL = 'https://gpo.zugaina.org/'
+SBASE_URL = BASE_URL + 'Search?search={}&page={}'
+
+class ZugainaDataSource(DataSource):
+
+ def __init__(self, query):
+ self.query = query
+ self.pagecache = {}
+ self.itemcache = {}
+ self.pagecount = -1
+
+ def page_count(self):
+ if self.pagecount > 0:
+ return self.pagecount
+ text = self.fetch_page(1)
+ doc = lxml.html.document_fromstring(text)
+ field = doc.xpath('//div[@class="pager"]/span')[0].text
+ self.pagecount = (field.split(' ')[-1] + 49) // 50
+ return self.pagecount
+
+ def get_page(self, page):
+ text = self.fetch_page(page)
+ doc = lxml.html.document_fromstring(text)
+ return [(strip(x.text), x.getchildren()[0].text)
+ for x in doc.xpath('//div[@id="search_results"]/a/div')]
+
+ def list_items(self, ident):
+ text = self.fetch_item(ident)
+ doc = lxml.html.document_fromstring(text)
+ res = []
+ for div in doc.xpath('//div[@id="ebuild_list"]/ul/div'):
+ id_ = div.xpath('li/a')[0].get('href').split('/')[3]
+ pv = div.xpath('li/div/b').text
+ overlay = div.xpath('@id')
+ res.append(id_, pv, overlay)
+
+ def get_item(self, ident):
+ return results.get(BASE_URL + 'AJAX/Ebuild/' + ident).text
+
+ def fetch_item(self, ident):
+ if ident in self.itemcache:
+ return self.itemcache[ident]
+ res = requests.get(BASE_URL + ident).text
+ return res
+
+
+ def fetch_page(self, page):
+ if page in self.pagecache:
+ return self.pagecache[page]
+ res = requests.get(SBASE_URL.format(self.query, page)).text
+ self.pagecache[page] = res
+ return res