diff --git a/kicost/distributors/__init__.py b/kicost/distributors/__init__.py
new file mode 100644
index 000000000..b017cfb71
--- /dev/null
+++ b/kicost/distributors/__init__.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+
+__author__ = 'XESS Corporation'
+__email__ = 'info@xess.com'
+
+from random import randint
+
+# Global constants for distributor site scraping.
+import http.client # For web scraping exceptions.
+try:
+ from urllib.parse import urlencode, quote as urlquote, urlsplit, urlunsplit
+ import urllib.request
+ from urllib.request import urlopen, Request
+except ImportError:
+ from urlparse import quote as urlquote, urlsplit, urlunsplit
+ from urllib import urlencode
+ from urllib2 import urlopen, Request
+HTML_RESPONSE_RETRIES = 2 # Num of retries for getting part data web page.
+WEB_SCRAPE_EXCEPTIONS = (urllib.request.URLError, http.client.HTTPException)
+
+
+def get_user_agent():
+ # The default user_agent_list comprises chrome, IE, firefox, Mozilla, opera, netscape.
+ # for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php
+ user_agent_list = [
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
+ "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
+ "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
+ "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
+ "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
+ "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
+ ]
+ return user_agent_list[randint(0, len(user_agent_list) - 1)]
+
+def FakeBrowser(url):
+ req = Request(url)
+ req.add_header('Accept-Language', 'en-US')
+ req.add_header('User-agent', get_user_agent())
+ return req
+
+
+# The global dictionary of distributor information starts out empty.
+distributors = {}
+
+import os
+
+# The distributor module directories will be found in this directory.
+directory = os.path.dirname(__file__)
+
+# Search for the distributor modules and import them.
+for module in os.listdir(os.path.dirname(__file__)):
+
+ # Avoid importing non-directories.
+ abs_module = os.path.join(directory, module)
+ if not os.path.isdir(abs_module):
+ continue
+
+ # Avoid directories like __pycache__.
+ if module.startswith('__'):
+ continue
+
+ # Import the module.
+ __import__(module, globals(), locals(), [], level=1)
diff --git a/kicost/distributors/digikey/__init__.py b/kicost/distributors/digikey/__init__.py
new file mode 100644
index 000000000..f5edaa7ec
--- /dev/null
+++ b/kicost/distributors/digikey/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+
+__author__ = 'XESS Corporation'
+__email__ = 'info@xess.com'
+
+from .digikey import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'digikey': {
+ 'module': 'digikey', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'Digi-Key', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['purch', 'part_num', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ',', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#CC0000' # Digi-Key red.
+ }
+ }
+ }
+)
+
diff --git a/kicost/distributors/digikey/digikey.py b/kicost/distributors/digikey/digikey.py
new file mode 100644
index 000000000..edec1f442
--- /dev/null
+++ b/kicost/distributors/digikey/digikey.py
@@ -0,0 +1,286 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the Digikey product page.'''
+ price_tiers = {}
+ try:
+ for tr in html_tree.find('table', id='product-dollars').find_all('tr'):
+ try:
+ td = tr.find_all('td')
+ qty = int(re.sub('[^0-9]', '', td[0].text))
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', td[1].text))
+ except (TypeError, AttributeError, ValueError,
+ IndexError): # Happens when there's no
in table row.
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ logger.log(DEBUG_OBSESSIVE, 'No Digikey pricing information found!')
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+
+def part_is_reeled(html_tree):
+ '''Returns True if this Digi-Key part is reeled or Digi-reeled.'''
+ qty_tiers = list(get_price_tiers(html_tree).keys())
+ if len(qty_tiers) > 0 and min(qty_tiers) >= 100:
+ return True
+ if html_tree.find('table',
+ id='product-details-reel-pricing') is not None:
+ return True
+ return False
+
+
+def get_part_num(html_tree):
+ '''Get the part number from the Digikey product page.'''
+ try:
+ return re.sub('\s', '', html_tree.find('td',
+ id='reportPartNumber').text)
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'No Digikey part number found!')
+ return ''
+
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the Digikey product page.'''
+ try:
+ qty_tree = html_tree.find('td', id='quantityAvailable').find('span', id='dkQty')
+ qty_str = qty_tree.text
+ except AttributeError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+ try:
+ qty_str = re.search('([0-9,]*)', qty_str, re.IGNORECASE).group(1)
+ return int(re.sub('[^0-9]', '', qty_str))
+ except (AttributeError, ValueError):
+ # Didn't find the usual quantity text field. This might be one of those
+ # input fields for requesting a quantity, so get the value from the
+ # input field.
+ try:
+ logger.log(DEBUG_OBSESSIVE, 'No Digikey part quantity found!')
+ return int(qty_tree.find('input', type='text').get('value'))
+ except (AttributeError, ValueError):
+ # Well, there's a quantityAvailable section in the website, but
+ # it doesn't contain anything decipherable. Let's just assume it's 0.
+ return 0
+
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the Digikey HTML page for a part number and return the URL and parse tree.'''
+
+ def merge_price_tiers(main_tree, alt_tree):
+ '''Merge the price tiers from the alternate-packaging tree into the main tree.'''
+ try:
+ insertion_point = main_tree.find('table', id='product-dollars').find('tr')
+ for tr in alt_tree.find('table', id='product-dollars').find_all('tr'):
+ insertion_point.insert_after(tr)
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'Problem merging price tiers for Digikey part {} with alternate packaging!'.format(pn))
+
+ def merge_qty_avail(main_tree, alt_tree):
+ '''Merge the quantities from the alternate-packaging tree into the main tree.'''
+ try:
+ main_qty = get_qty_avail(main_tree)
+ alt_qty = get_qty_avail(alt_tree)
+ if main_qty is None:
+ merged_qty = alt_qty
+ elif alt_qty is None:
+ merged_qty = main_qty
+ else:
+ merged_qty = max(main_qty, alt_qty)
+ if merged_qty is not None:
+ insertion_point = main_tree.find('td', id='quantityAvailable').find('span', id='dkQty')
+ insertion_point.string = '{}'.format(merged_qty)
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'Problem merging available quantities for Digikey part {} with alternate packaging!'.format(pn))
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+ url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote(
+ pn + ' ' + extra_search_terms,
+ safe='')
+ #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go'
+ elif url[0] == '/':
+ url = 'http://www.digikey.com' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ req = FakeBrowser(url)
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Use the following code if Javascript challenge pages are used to block scrapers.
+ # try:
+ # ghst = Ghost()
+ # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False)
+ # html, resources = sess.open(url)
+ # print('type of HTML is {}'.format(type(html.content)))
+ # html = html.content
+ # except Exception as e:
+ # print('Exception reading with Ghost: {}'.format(e))
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then return it.
+ if tree.find('div', class_='product-top-section') is not None:
+
+ # Digikey separates cut-tape and reel packaging, so we need to examine more pages
+ # to get all the pricing info. But don't descend any further if limit has been reached.
+ if descend > 0:
+ try:
+ # Find all the URLs to alternate-packaging pages for this part.
+ ap_urls = [
+ ap.find('li', class_='lnkAltPack').find_all('a')[-1].get('href')
+ for ap in tree.find(
+ 'div', class_='bota',
+ id='additionalPackaging').find_all(
+ 'ul', class_='more-expander-item')
+ ]
+ logger.log(DEBUG_OBSESSIVE,'Found {} alternate packagings for {} from {}'.format(len(ap_urls), pn, dist))
+ ap_trees_and_urls = [] # Initialize as empty in case no alternate packagings are found.
+ try:
+ ap_trees_and_urls = [get_part_html_tree(dist, pn,
+ extra_search_terms, ap_url, descend=0)
+ for ap_url in ap_urls]
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'Failed to find alternate packagings for {} from {}'.format(pn, dist))
+
+ # Put the main tree on the list as well and then look through
+ # the entire list for one that's non-reeled. Use this as the
+ # main page for the part.
+ ap_trees_and_urls.append((tree, url))
+ if part_is_reeled(tree):
+ for ap_tree, ap_url in ap_trees_and_urls:
+ if not part_is_reeled(ap_tree):
+ # Found a non-reeled part, so use it as the main page.
+ tree = ap_tree
+ url = ap_url
+ break # Done looking.
+
+ # Now go through the other pages, merging their pricing and quantity
+ # info into the main page.
+ for ap_tree, ap_url in ap_trees_and_urls:
+ if ap_tree is tree:
+ continue # Skip examining the main tree. It already contains its info.
+ try:
+ # Merge the pricing info from that into the main parse tree to make
+ # a single, unified set of price tiers...
+ merge_price_tiers(tree, ap_tree)
+ # and merge available quantity, using the maximum found.
+ merge_qty_avail(tree, ap_tree)
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE,'Problem merging price/qty for {} from {}'.format(pn, dist))
+ continue
+ except AttributeError as e:
+ logger.log(DEBUG_OBSESSIVE,'Problem parsing URLs from product page for {} from {}'.format(pn, dist))
+
+ return tree, url # Return the parse tree and the URL where it came from.
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('table', id='productTable') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find(
+ 'table',
+ id='productTable').find('tbody').find_all('tr')
+
+ # Extract the product links for the part numbers from the table.
+ # Extract links for both manufacturer and catalog numbers.
+ product_links = [p.find('td',
+ class_='tr-mfgPartNumber').a
+ for p in products]
+ product_links.extend([p.find('td',
+ class_='tr-dkPartNumber').a
+ for p in products])
+
+ # Extract all the part numbers from the text portion of the links.
+ part_numbers = [l.text for l in product_links]
+
+ # Look for the part number in the list that most closely matches the requested part number.
+ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+
+ # Now look for the link that goes with the closest matching part number.
+ for l in product_links:
+ if l.text == match:
+ # Get the tree for the linked-to page and return that.
+ logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
+ return get_part_html_tree(dist, pn, extra_search_terms,
+ url=l['href'],
+ descend=descend - 1)
+
+ # If the HTML contains a list of part categories, then give up.
+ if tree.find('form', id='keywordSearchForm') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found high-level part categories for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
diff --git a/kicost/distributors/farnell/__init__.py b/kicost/distributors/farnell/__init__.py
new file mode 100644
index 000000000..ff6eca676
--- /dev/null
+++ b/kicost/distributors/farnell/__init__.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+__author__='Giacinto Luigi Cerone'
+
+from .farnell import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'farnell': {
+ 'module': 'farnell', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'Farnell', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ' ', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#FF6600' # Farnell/E14 orange.
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/farnell/farnell.py b/kicost/distributors/farnell/farnell.py
new file mode 100644
index 000000000..9bee57415
--- /dev/null
+++ b/kicost/distributors/farnell/farnell.py
@@ -0,0 +1,181 @@
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+from currency_converter import CurrencyConverter
+currency = CurrencyConverter()
+
+__author__='Giacinto Luigi Cerone'
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the farnell product page.'''
+ price_tiers = {}
+ try:
+ qty_strs = []
+ for qty in html_tree.find(
+ 'table',
+ class_=('tableProductDetailPrice', 'pricing')).find_all(
+ 'td',
+ class_='qty'):
+ qty_strs.append(qty.text)
+ price_strs = []
+ for price in html_tree.find(
+ 'table',
+ class_=('tableProductDetailPrice', 'pricing')).find_all(
+ 'td',
+ class_='threeColTd'):
+ price_strs.append(price.text)
+ qtys_prices = list(zip(qty_strs, price_strs))
+ for qty_str, price_str in qtys_prices:
+ try:
+ qty = re.search('(\s*)([0-9,]+)', qty_str).group(2)
+ qty = int(re.sub('[^0-9]', '', qty))
+ price_str=price_str.replace(',','.')
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str))
+ price_tiers[qty] = currency.convert(price_tiers[qty], 'EUR', 'USD')
+ except (TypeError, AttributeError, ValueError):
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+def get_part_num(html_tree):
+ '''Get the part number from the farnell product page.'''
+ try:
+ # farnell catalog number is stored in a description list, so get
+ # all the list terms and descriptions, strip all the spaces from those,
+ # and pair them up.
+ div = html_tree.find('div', class_='productDescription').find('dl')
+ dt = [re.sub('\s','',d.text) for d in div.find_all('dt')]
+ dd = [re.sub('\s','',d.text) for d in div.find_all('dd')]
+ dtdd = {k:v for k,v in zip(dt,dd)} # Pair terms with descriptions.
+# return dtdd['farnellPartNo.:']
+ return dtdd['CodiceProdotto']
+ except KeyError:
+ return '' # No catalog number found in page.
+ except AttributeError:
+ return '' # No ProductDescription found in page.
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the farnell product page.'''
+ try:
+ qty_str = html_tree.find('p', class_='availabilityHeading').text
+ except (AttributeError, ValueError):
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+ try:
+ qty = re.sub('[^0-9]','',qty_str) # Strip all non-number chars.
+ return int(re.sub('[^0-9]', '', qty_str)) # Return integer for quantity.
+ except ValueError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the farnell HTML page for a part number and return the URL and parse tree.'''
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+# url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote(
+# pn + ' ' + extra_search_terms,
+# safe='')
+ url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote(
+ pn + ' ' + extra_search_terms,
+ safe='')
+
+ elif url[0] == '/':
+ url = 'http://www.farnell.com' + url
+ elif url.startswith('..'):
+ url = 'http://www.farnell.com/Search/' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ req = FakeBrowser(url)
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then just return it.
+ if tree.find('div', class_='productDisplay', id='page') is not None:
+ return tree, url
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('table', class_='productLister', id='sProdList') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find('table',
+ class_='productLister',
+ id='sProdList').find_all('tr',
+ class_='altRow')
+
+ # Extract the product links for the part numbers from the table.
+ product_links = []
+ for p in products:
+ try:
+ product_links.append(p.find('td', class_='mftrPart').find('a'))
+ except AttributeError:
+ continue
+
+ # Extract all the part numbers from the text portion of the links.
+ part_numbers = [l.text for l in product_links]
+
+ # Look for the part number in the list that most closely matches the requested part number.
+ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+
+ # Now look for the link that goes with the closest matching part number.
+ for l in product_links:
+ if l.text == match:
+ # Get the tree for the linked-to page and return that.
+ logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
+ return get_part_html_tree(dist, pn, extra_search_terms,
+ url=l['href'], descend=descend-1)
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
diff --git a/kicost/distributors/local/__init__.py b/kicost/distributors/local/__init__.py
new file mode 100644
index 000000000..0525c1b1c
--- /dev/null
+++ b/kicost/distributors/local/__init__.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+__author__ = 'XESS Corporation'
+__email__ = 'info@xess.com'
+
+from .local import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'local_template': {
+ 'module': 'local', # The directory name containing this file.
+ 'scrape': 'local', # Allowable values: 'web' or 'local'.
+ 'label': 'Local', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ' ', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#008000' # Darker green.
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/local/local.py b/kicost/distributors/local/local.py
new file mode 100644
index 000000000..5d7add0f5
--- /dev/null
+++ b/kicost/distributors/local/local.py
@@ -0,0 +1,114 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+from ...kicost import SEPRTR
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the local product page.'''
+ price_tiers = {}
+ try:
+ pricing = html_tree.find('div', class_='pricing').text
+ pricing = re.sub('[^0-9.;:]', '', pricing) # Keep only digits, decimals, delimiters.
+ for qty_price in pricing.split(';'):
+ qty, price = qty_price.split(SEPRTR)
+ price_tiers[int(qty)] = float(price)
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ logger.log(DEBUG_OBSESSIVE, 'No local pricing information found!')
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+
+def get_part_num(html_tree):
+ '''Get the part number from the local product page.'''
+ try:
+ part_num_str = html_tree.find('div', class_='cat#').text
+ return part_num_str
+ except AttributeError:
+ return ''
+
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the local product page.'''
+ try:
+ qty_str = html_tree.find('div', class_='quantity').text
+ except (AttributeError, ValueError):
+ # Return 0 (not None) so this part will show in the spreadsheet
+ # even if there is no quantity found.
+ return 0
+ try:
+ return int(re.sub('[^0-9]', '', qty_str))
+ except ValueError:
+ # Return 0 (not None) so this part will show in the spreadsheet
+ # even if there is no quantity found.
+ logger.log(DEBUG_OBSESSIVE, 'No local part quantity found!')
+ return 0
+
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=None, local_part_html=None):
+ '''Extract the HTML tree from the HTML page for local parts.'''
+
+ # Extract the HTML tree from the local part HTML page.
+ try:
+ tree = BeautifulSoup(local_part_html, 'lxml')
+ except Exception:
+ raise PartHtmlError
+
+ try:
+ # Find the DIV in the tree for the given part and distributor.
+ class_ = dist + SEPRTR + pn
+ part_tree = tree.find('div', class_=class_)
+ url_tree = part_tree.find('div', class_='link')
+ try:
+ # Return the part data tree and any URL associated with the part.
+ return part_tree, url_tree.text.strip()
+ except AttributeError:
+ # Return part data tree and None if the URL is not found.
+ return part_tree, None
+ except AttributeError:
+ # Return an error if the part_tree is not found.
+ raise PartHtmlError
diff --git a/kicost/distributors/mouser/__init__.py b/kicost/distributors/mouser/__init__.py
new file mode 100644
index 000000000..741de6fc3
--- /dev/null
+++ b/kicost/distributors/mouser/__init__.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+__author__ = 'XESS Corporation'
+__email__ = 'info@xess.com'
+
+from .mouser import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'mouser': {
+ 'module': 'mouser', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'Mouser', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ' ', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#004A85' # Mouser blue.
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/mouser/mouser.py b/kicost/distributors/mouser/mouser.py
new file mode 100644
index 000000000..3bb4f656c
--- /dev/null
+++ b/kicost/distributors/mouser/mouser.py
@@ -0,0 +1,191 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the Mouser product page.'''
+ price_tiers = {}
+ try:
+ qty_strs = []
+ for qty in html_tree.find('div',
+ class_='PriceBreaks').find_all(
+ 'div',
+ class_='PriceBreakQuantity'):
+ qty_strs.append(qty.text)
+ price_strs = []
+ for price in html_tree.find('div',
+ class_='PriceBreaks').find_all(
+ 'div',
+ class_='PriceBreakPrice'):
+ price_strs.append(price.text)
+ qtys_prices = list(zip(qty_strs, price_strs))
+ for qty_str, price_str in qtys_prices:
+ try:
+ qty = re.search('(\s*)([0-9,]+)', qty_str).group(2)
+ qty = int(re.sub('[^0-9]', '', qty))
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str))
+ except (TypeError, AttributeError, ValueError, IndexError):
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ logger.log(DEBUG_OBSESSIVE, 'No Mouser pricing information found!')
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+
+def get_part_num(html_tree):
+ '''Get the part number from the Mouser product page.'''
+ try:
+ return re.sub('\n', '', html_tree.find('div',
+ id='divMouserPartNum').text)
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'No Mouser part number found!')
+ return ''
+
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the Mouser product page.'''
+ try:
+ qty_str = html_tree.find('div',
+ id='availability').find(
+ 'div',
+ class_='av-row').find(
+ 'div',
+ class_='av-col2').text
+ except AttributeError as e:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ logger.log(DEBUG_OBSESSIVE, 'No Mouser part quantity found!')
+ return None
+ try:
+ qty_str = re.search('(\s*)([0-9,]*)', qty_str, re.IGNORECASE).group(2)
+ return int(re.sub('[^0-9]', '', qty_str))
+ except ValueError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ logger.log(DEBUG_OBSESSIVE, 'No Mouser part quantity found!')
+ return None
+
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the Mouser HTML page for a part number and return the URL and parse tree.'''
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+ url = 'http://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote(
+ pn + ' ' + extra_search_terms,
+ safe='')
+ elif url[0] == '/':
+ url = 'http://www.mouser.com' + url
+ elif url.startswith('..'):
+ url = 'http://www.mouser.com/Search/' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ req = FakeBrowser(url)
+ req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe')
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then just return it.
+ if tree.find('div', id='product-details') is not None:
+ return tree, url
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('table', class_='SearchResultsTable') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find(
+ 'table',
+ class_='SearchResultsTable').find_all(
+ 'tr',
+ class_=('SearchResultsRowOdd', 'SearchResultsRowEven'))
+
+ # Extract the product links for the part numbers from the table.
+ product_links = [p.find('div', class_='mfrDiv').a for p in products]
+
+ # Extract all the part numbers from the text portion of the links.
+ part_numbers = [l.text for l in product_links]
+
+ # Look for the part number in the list that most closely matches the requested part number.
+ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+
+ # Now look for the link that goes with the closest matching part number.
+ for l in product_links:
+ if l.text == match:
+ # Get the tree for the linked-to page and return that.
+ logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
+ return get_part_html_tree(dist, pn, extra_search_terms,
+ url=l['href'], descend=descend-1)
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
diff --git a/kicost/distributors/newark/__init__.py b/kicost/distributors/newark/__init__.py
new file mode 100644
index 000000000..2ee262c9c
--- /dev/null
+++ b/kicost/distributors/newark/__init__.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+__author__ = 'XESS Corporation'
+__email__ = 'info@xess.com'
+
+from .newark import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'newark': {
+ 'module': 'newark', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'Newark', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ',', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#A2AE06' # Newark/E14 olive green.
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/newark/newark.py b/kicost/distributors/newark/newark.py
new file mode 100644
index 000000000..6090eca58
--- /dev/null
+++ b/kicost/distributors/newark/newark.py
@@ -0,0 +1,202 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the Newark product page.'''
+ price_tiers = {}
+ try:
+ qty_strs = []
+ for qty in html_tree.find(
+ 'table',
+ class_=('tableProductDetailPrice', 'pricing')).find_all(
+ 'td',
+ class_='qty'):
+ qty_strs.append(qty.text)
+ price_strs = []
+ for price in html_tree.find(
+ 'table',
+ class_=('tableProductDetailPrice', 'pricing')).find_all(
+ 'td',
+ class_='threeColTd'):
+ price_strs.append(price.text)
+ qtys_prices = list(zip(qty_strs, price_strs))
+ for qty_str, price_str in qtys_prices:
+ try:
+ qty = re.search('(\s*)([0-9,]+)', qty_str).group(2)
+ qty = int(re.sub('[^0-9]', '', qty))
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str))
+ except (TypeError, AttributeError, ValueError):
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ logger.log(DEBUG_OBSESSIVE, 'No Newark pricing information found!')
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+
+def get_part_num(html_tree):
+ '''Get the part number from the Newark product page.'''
+ try:
+ # Newark catalog number is stored in a description list, so get
+ # all the list terms and descriptions, strip all the spaces from those,
+ # and pair them up.
+ div = html_tree.find('div', class_='productDescription').find('dl')
+ dt = [re.sub('\s','',d.text) for d in div.find_all('dt')]
+ dd = [re.sub('\s','',d.text) for d in div.find_all('dd')]
+ dtdd = {k:v for k,v in zip(dt,dd)} # Pair terms with descriptions.
+ return dtdd['NewarkPartNo.:']
+ except KeyError:
+ logger.log(DEBUG_OBSESSIVE, 'No Newark catalog number found!')
+ return '' # No catalog number found in page.
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'No Newark product description found!')
+ return '' # No ProductDescription found in page.
+
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the Newark product page.'''
+ try:
+ qty_str = html_tree.find('p', class_='availabilityHeading').text
+ except (AttributeError, ValueError):
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+ try:
+ qty = re.sub('[^0-9]','',qty_str) # Strip all non-number chars.
+ return int(re.sub('[^0-9]', '', qty_str)) # Return integer for quantity.
+ except ValueError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ logger.log(DEBUG_OBSESSIVE, 'No Newark part quantity found!')
+ return None
+
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the Newark HTML page for a part number and return the URL and parse tree.'''
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+ url = 'http://www.newark.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote(
+ pn + ' ' + extra_search_terms,
+ safe='')
+ elif url[0] == '/':
+ url = 'http://www.newark.com' + url
+ elif url.startswith('..'):
+ url = 'http://www.newark.com/Search/' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ req = FakeBrowser(url)
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then just return it.
+ if tree.find('div', class_='productDisplay', id='page') is not None:
+ return tree, url
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('table', class_='productLister', id='sProdList') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find('table',
+ class_='productLister',
+ id='sProdList').find('tbody').find_all('tr')
+
+ # Extract the product links for the part numbers from the table.
+ product_links = []
+ for p in products:
+ try:
+ product_links.append(
+ p.find('td', class_='mftrPart').find('a'))
+ except AttributeError:
+ continue
+
+ # Extract all the part numbers from the text portion of the links.
+ part_numbers = [l.text for l in product_links]
+
+ # Look for the part number in the list that most closely matches the requested part number.
+ try:
+ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+ except IndexError:
+ raise PartHtmlError
+
+ # Now look for the link that goes with the closest matching part number.
+ for l in product_links:
+ if l.text == match:
+ # Get the tree for the linked-to page and return that.
+ logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
+ return get_part_html_tree(dist, pn, extra_search_terms,
+ url=l['href'], descend=descend-1)
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
diff --git a/kicost/distributors/rs/__init__.py b/kicost/distributors/rs/__init__.py
new file mode 100644
index 000000000..7ee749adb
--- /dev/null
+++ b/kicost/distributors/rs/__init__.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+__author__='Giacinto Luigi Cerone'
+
+from .rs import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'rs': {
+ 'module': 'rs', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'RS Components', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ' ', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#FF0000' # RS Components red.
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/rs/rs.py b/kicost/distributors/rs/rs.py
new file mode 100644
index 000000000..1d8366e0c
--- /dev/null
+++ b/kicost/distributors/rs/rs.py
@@ -0,0 +1,209 @@
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+from currency_converter import CurrencyConverter
+currency = CurrencyConverter()
+
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the RS Components product page.'''
+ price_tiers = {}
+
+ try:
+ qty_strs = []
+ for qty in html_tree.find_all('div',class_='breakRangeWithoutUnit', itemprop='eligibleQuantity'):
+ qty_strs.append(qty.text)
+ price_strs = []
+ for price in html_tree.find_all('div', class_='unitPrice'):
+ if price.text is not u'':
+ price_strs.append(price.text)
+ qtys_prices = list(zip(qty_strs, price_strs))
+ for qty_str, price_str in qtys_prices:
+ try:
+ qty = re.search('(\s*)([0-9,]+)', qty_str).group(2)
+ qty = int(re.sub('[^0-9]', '', qty))
+ price_str=price_str.replace(',','.')
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str))
+ price_tiers[qty] = currency.convert(price_tiers[qty], 'EUR', 'USD')
+ except (TypeError, AttributeError, ValueError):
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+def get_part_num(html_tree):
+ '''Get the part number from the farnell product page.'''
+ try:
+ pn_str = html_tree.find('span', class_='keyValue bold', itemprop='sku').text
+ pn = re.sub('[^0-9\-]','', pn_str)
+ return pn
+ except KeyError:
+ return '' # No catalog number found in page.
+ except AttributeError:
+ return '' # No ProductDescription found in page.
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the farnell product page.'''
+
+ try:
+ # Note that 'availability' is misspelled in the container class name!
+ qty_str = html_tree.find('div', class_='floatLeft stockMessaging availMessageDiv bottom5').text
+ except (AttributeError, ValueError):
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+ try:
+ qty = re.sub('[^0-9]','',qty_str[0:10]) # Strip all non-number chars.
+ return int(qty) # Return integer for quantity.
+ except ValueError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ return None
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the RS Components HTML page for a part number and return the URL and parse tree.'''
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+ url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(pn + ' ' + extra_search_terms, safe='')
+
+ elif url[0] == '/':
+ url = 'http://it.rs-online.com' + url
+ elif url.startswith('..'):
+ url = 'http://it.rs-online.com/Search/' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ req = FakeBrowser(url)
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then just return it.
+ if tree.find('div', class_='specTableContainer') is not None:
+ return tree, url
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('div', class_='srtnPageContainer') is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find_all('tr', class_='resultRow')
+
+ # Extract the product links for the part numbers from the table.
+ product_links= []
+ for p in products:
+ try:
+ product_links.append(p.find('a',class_='primarySearchLink')['href'])
+ # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.)
+ return get_part_html_tree(dist, pn, extra_search_terms,url=product_links[0], descend=descend-1)
+ except AttributeError:
+ continue
+ except TypeError:
+ #~ print('****************dist:',dist,'pn:**************************',pn)
+ continue
+
+
+
+ #~ # If the tree is for a list of products, then examine the links to try to find the part number.
+ #~ if tree.find('div', class_='srtnPageContainer') is not None:
+ #~ if descend <= 0:
+ #~ raise PartHtmlError
+ #~ else:
+ #~ # Look for the table of products.
+ #~ products = tree.find('table',
+ #~ class_='productLister',
+ #~ id='sProdList').find_all('tr',
+ #~ class_='altRow')
+
+ #~ # Extract the product links for the part numbers from the table.
+ #~ product_links = []
+ #~ for p in products:
+ #~ try:
+ #~ product_links.append(
+ #~ p.find('td',
+ #~ class_='mftrPart').find('p',
+ #~ class_='wordBreak').a)
+ #~ except AttributeError:
+ #~ continue
+
+ #~ # Extract all the part numbers from the text portion of the links.
+ #~ part_numbers = [l.text for l in product_links]
+
+ #~ # Look for the part number in the list that most closely matches the requested part number.
+ #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+
+ #~ # Now look for the link that goes with the closest matching part number.
+ #~ for l in product_links:
+ #~ if l.text == match:
+ #~ # Get the tree for the linked-to page and return that.
+ #~ return get_part_html_tree(dist, pn, extra_search_terms,
+ #~ url=l['href'], descend=descend-1)
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+if __name__=='__main__':
+
+ #~ html_tree=get_part_html_tree(dist='rs',pn='MSP430F5438AIPZ')
+ #~ html_tree=get_part_html_tree(dist='rs',pn='CC3200-LAUNCHXL')
+ #~ html_tree=get_part_html_tree(dist='rs',pn='LM358PW')
+ html_tree=get_part_html_tree(dist='rs',pn='MCP1252-33X50I/MS')
+
+ pt=get_price_tiers(html_tree[0])
+ qt=get_qty_avail(html_tree[0])
+ pn=get_part_num(html_tree[0])
+ print('****************')
+ print(pt)
+ print('****************')
+ print(qt)
+ print('****************')
+ print(pn)
+ print('****************')
+
+
diff --git a/kicost/distributors/tme/__init__.py b/kicost/distributors/tme/__init__.py
new file mode 100644
index 000000000..c20b95fac
--- /dev/null
+++ b/kicost/distributors/tme/__init__.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+__author__ ='Adam Heinrich'
+__email__ = 'adam@adamh.cz'
+
+from .tme import *
+
+# Place information about this distributor into the distributor dictionary.
+from .. import distributors
+distributors.update(
+ {
+ 'tme': {
+ 'module': 'tme', # The directory name containing this file.
+ 'scrape': 'web', # Allowable values: 'web' or 'local'.
+ 'label': 'TME', # Distributor label used in spreadsheet columns.
+ 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders.
+ 'order_delimiter': ' ', # Delimiter for online orders.
+ # Formatting for distributor header in worksheet.
+ 'wrk_hdr_format': {
+ 'font_size': 14,
+ 'font_color': 'white',
+ 'bold': True,
+ 'align': 'center',
+ 'valign': 'vcenter',
+ 'bg_color': '#0C4DA1' # TME blue
+ }
+ }
+ }
+)
diff --git a/kicost/distributors/tme/tme.py b/kicost/distributors/tme/tme.py
new file mode 100644
index 000000000..3e31e8c57
--- /dev/null
+++ b/kicost/distributors/tme/tme.py
@@ -0,0 +1,230 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+import re
+import difflib
+import json
+from bs4 import BeautifulSoup
+import http.client # For web scraping exceptions.
+from .. import urlencode, urlquote, urlsplit, urlunsplit, urlopen, Request
+from .. import HTML_RESPONSE_RETRIES
+from .. import WEB_SCRAPE_EXCEPTIONS
+from .. import FakeBrowser
+from ...kicost import PartHtmlError
+from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+
+def __ajax_details(pn):
+ '''Load part details from TME using XMLHttpRequest'''
+ data = urlencode({
+ 'symbol': pn,
+ 'currency': 'USD'
+ }).encode("utf-8")
+ req = FakeBrowser('http://www.tme.eu/en/_ajax/ProductInformationPage/_getStocks.html')
+ req.add_header('X-Requested-With', 'XMLHttpRequest')
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ response = urlopen(req, data)
+ r = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No AJAX data for {} from {}'.format(pn, dist))
+ return None, None
+
+ try:
+ r = r.decode('utf-8') # Convert bytes to string in Python 3.
+ p = json.loads(r)['Products'][0]
+ html_tree = BeautifulSoup(p['PriceTpl'].replace("\n", ""), "lxml")
+ quantity = p['InStock']
+ return html_tree, quantity
+ except (ValueError, KeyError, IndexError):
+ logger.log(DEBUG_OBSESSIVE, 'Could not obtain AJAX data from TME!')
+ return None, None
+
+def get_price_tiers(html_tree):
+ '''Get the pricing tiers from the parsed tree of the TME product page.'''
+ price_tiers = {}
+ try:
+ pn = get_part_num(html_tree)
+ if pn == '':
+ return price_tiers
+
+ ajax_tree, quantity = __ajax_details(pn)
+ if ajax_tree is None:
+ return price_tiers
+
+ qty_strs = []
+ price_strs = []
+ for tr in ajax_tree.find('tbody', id='prices_body').find_all('tr'):
+ td = tr.find_all('td')
+ if len(td) == 3:
+ qty_strs.append(td[0].text)
+ price_strs.append(td[2].text)
+
+ qtys_prices = list(zip(qty_strs, price_strs))
+ for qty_str, price_str in qtys_prices:
+ try:
+ qty = re.search('(\s*)([0-9,]+)', qty_str).group(2)
+ qty = int(re.sub('[^0-9]', '', qty))
+ price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str))
+ except (TypeError, AttributeError, ValueError, IndexError):
+ continue
+ except AttributeError:
+ # This happens when no pricing info is found in the tree.
+ logger.log(DEBUG_OBSESSIVE, 'No TME pricing information found!')
+ return price_tiers # Return empty price tiers.
+ return price_tiers
+
+
+def get_part_num(html_tree):
+ '''Get the part number from the TME product page.'''
+ try:
+ return html_tree.find('td', class_="pip-product-symbol").text
+ except AttributeError:
+ logger.log(DEBUG_OBSESSIVE, 'No TME part number found!')
+ return ''
+
+
+def get_qty_avail(html_tree):
+ '''Get the available quantity of the part from the TME product page.'''
+ pn = get_part_num(html_tree)
+ if pn == '':
+ logger.log(DEBUG_OBSESSIVE, 'No TME part quantity found!')
+ return None
+
+ ajax_tree, qty_str = __ajax_details(pn)
+ if qty_str is None:
+ return None
+
+ try:
+ return int(qty_str)
+ except ValueError:
+ # No quantity found (not even 0) so this is probably a non-stocked part.
+ # Return None so the part won't show in the spreadsheet for this dist.
+ logger.log(DEBUG_OBSESSIVE, 'No TME part quantity found!')
+ return None
+
+
+def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None):
+ '''Find the TME HTML page for a part number and return the URL and parse tree.'''
+
+ # Use the part number to lookup the part using the site search function, unless a starting url was given.
+ if url is None:
+ url = 'http://www.tme.eu/en/katalog/?search=' + urlquote(
+ pn + ' ' + extra_search_terms,
+ safe='')
+ elif url[0] == '/':
+ url = 'http://www.tme.eu' + url
+
+ # Open the URL, read the HTML from it, and parse it into a tree structure.
+ req = FakeBrowser(url)
+ for _ in range(HTML_RESPONSE_RETRIES):
+ try:
+ response = urlopen(req)
+ html = response.read()
+ break
+ except WEB_SCRAPE_EXCEPTIONS:
+ logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist))
+ pass
+ else: # Couldn't get a good read from the website.
+ logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # Abort if the part number isn't in the HTML somewhere.
+ # (Only use the numbers and letters to compare PN to HTML.)
+ if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))):
+ logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {} ({})'.format(pn, dist, url))
+ raise PartHtmlError
+
+ try:
+ tree = BeautifulSoup(html, 'lxml')
+ except Exception:
+ logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+
+ # If the tree contains the tag for a product page, then just return it.
+ if tree.find('div', id='ph') is not None:
+ return tree, url
+
+ # If the tree is for a list of products, then examine the links to try to find the part number.
+ if tree.find('table', id="products") is not None:
+ logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist))
+ if descend <= 0:
+ logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist))
+ raise PartHtmlError
+ else:
+ # Look for the table of products.
+ products = tree.find(
+ 'table',
+ id="products").find_all(
+ 'tr',
+ class_=('product-row'))
+
+ # Extract the product links for the part numbers from the table.
+ product_links = []
+ for p in products:
+ for a in p.find('div', class_='manufacturer').find_all('a'):
+ product_links.append(a)
+
+ # Extract all the part numbers from the text portion of the links.
+ part_numbers = [l.text for l in product_links]
+
+ # Look for the part number in the list that most closely matches the requested part number.
+ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0]
+
+ # Now look for the link that goes with the closest matching part number.
+ for l in product_links:
+ if (not l['href'].startswith('./katalog')) and l.text == match:
+ # Get the tree for the linked-to page and return that.
+ logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist))
+ # TODO: The current implementation does up to four HTTP
+ # requests per part (search, part details page for TME P/N,
+ # XHR for pricing information, and XHR for stock
+ # availability). This is mainly for the compatibility with
+ # other distributor implementations (html_tree gets passed
+ # to all functions).
+ # A modified implementation (which would pass JSON data
+ # obtained by the XHR instead of the HTML DOM tree) might be
+ # able to do the same with just two requests (search for TME
+ # P/N, XHR for pricing and stock availability).
+ return get_part_html_tree(dist, pn, extra_search_terms,
+ url=l['href'], descend=descend-1)
+
+ # I don't know what happened here, so give up.
+ logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist))
+ raise PartHtmlError
diff --git a/kicost/eda_tools/__init__.py b/kicost/eda_tools/__init__.py
new file mode 100644
index 000000000..7679bf455
--- /dev/null
+++ b/kicost/eda_tools/__init__.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Author information.
+__author__ = 'Hildo Guillardi Junior'
+__webpage__ = 'https://github.com/hildogjr/'
+__company__ = 'University of Campinas - Brazil'
+
+import os
+
+
+# Reference string order to the spreadsheet. Use this to
+# group the elements in sequencial rows.
+BOM_ORDER = 'u,q,d,t,y,x,c,r,s,j,p,cnn,con'
+
+
+# The distributor module directories will be found in this directory.
+directory = os.path.dirname(__file__)
+
+# Search for the distributor modules and import them.
+for module in os.listdir(os.path.dirname(__file__)):
+
+ # Avoid importing non-directories.
+ abs_module = os.path.join(directory, module)
+ if not os.path.isdir(abs_module):
+ continue
+
+ # Avoid directories like __pycache__.
+ if module.startswith('__'):
+ continue
+
+ # Import the module.
+ __import__(module, globals(), locals(), [], level=1)
+
+from .subparts import * # Subparts and sub quantities rotines.
diff --git a/kicost/eda_tools/altium/__init__.py b/kicost/eda_tools/altium/__init__.py
new file mode 100644
index 000000000..bcf49d5a8
--- /dev/null
+++ b/kicost/eda_tools/altium/__init__.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Author information.
+__author__ = 'Hildo Guillardi Junior'
+__webpage__ = 'https://github.com/hildogjr/'
+__company__ = 'University of Campinas - Brazil'
+
+from .altium import get_part_groups_altium
diff --git a/kicost/eda_tools/altium/altium.py b/kicost/eda_tools/altium/altium.py
new file mode 100644
index 000000000..9c6a3c778
--- /dev/null
+++ b/kicost/eda_tools/altium/altium.py
@@ -0,0 +1,143 @@
+# Inserted by Pasteurize tool.
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from builtins import zip
+from builtins import range
+from builtins import int
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
+
+import future
+
+from bs4 import BeautifulSoup
+import logging
+
+logger = logging.getLogger('kicost')
+
+DEBUG_OVERVIEW = logging.DEBUG
+DEBUG_DETAILED = logging.DEBUG-1
+DEBUG_OBSESSIVE = logging.DEBUG-2
+
+import sys
+
+SEPRTR = ':' # Delimiter between library:component, distributor:field, etc.
+
+# Temporary class for storing part group information.
+class IdenticalComponents(object):
+ pass
+
+def get_part_groups_altium(in_file, ignore_fields, variant):
+ '''Get groups of identical parts from an XML file and return them as a dictionary.'''
+
+ ign_fields = [str(f.lower()) for f in ignore_fields]
+
+
+ def extract_fields(part, variant):
+ '''Extract XML fields from the part in a library or schematic.'''
+
+ fields = {}
+
+ if sys.version[0]=='2':
+ fields['footprint']=part['footprint1'].encode('ascii', 'ignore')
+ fields['libpart']=part['libref1'].encode('ascii', 'ignore')
+ fields['value']=part['value3'].encode('ascii', 'ignore')
+ fields['reference']=part['comment1'].encode('ascii', 'ignore')
+ fields['manf#']=part['manufacturer_part_number_11'].encode('ascii', 'ignore')
+ else:
+ fields['footprint']=part['footprint1']
+ fields['libpart']=part['libref1']
+ fields['value']=part['value3']
+ fields['reference']=part['comment1']
+ fields['manf#']=part['manufacturer_part_number_11']
+
+ return fields
+
+ # Read-in the schematic XML file to get a tree and get its root.
+ logger.log(DEBUG_OVERVIEW, 'Get schematic XML...')
+ root = BeautifulSoup(in_file, 'lxml')
+
+ # Make a dictionary from the fields in the parts library so these field
+ # values can be instantiated into the individual components in the schematic.
+ logger.log(DEBUG_OVERVIEW, 'Get parts library...')
+ libparts = {}
+ component_groups = {}
+
+ for p in root.find('rows').find_all('row'):
+
+ # Get the values for the fields in each library part (if any).
+ fields = extract_fields(p, variant)
+
+ # Store the field dict under the key made from the
+ # concatenation of the library and part names.
+ #~ libparts[str(fields['libpart'] + SEPRTR + fields['reference'])] = fields
+ libparts[fields['libpart'] + SEPRTR + fields['reference']] = fields
+
+ # Also have to store the fields under any part aliases.
+ try:
+ for alias in p.find('aliases').find_all('alias'):
+ libparts[str(fields['libpart'] + SEPRTR + alias.string)] = fields
+ except AttributeError:
+ pass # No aliases for this part.
+
+ hash_fields = {k: fields[k] for k in fields if k not in ('manf#','manf') and SEPRTR not in k}
+ h = hash(tuple(sorted(hash_fields.items())))
+
+ component_groups[h] = IdenticalComponents() # Add empty structure.
+ component_groups[h].fields = fields
+ component_groups[h].refs = p['designator1'].replace(' ','').split(',') # Init list of refs with first ref.
+ # Now add the manf. part num (or None) for this part to the group set.
+ component_groups[h].manf_nums = set([fields.get('manf#')])
+
+ # Now we have groups of seemingly identical parts. But some of the parts
+ # within a group may have different manufacturer's part numbers, and these
+ # groups may need to be split into smaller groups of parts all having the
+ # same manufacturer's number. Here are the cases that need to be handled:
+ # One manf# number: All parts have the same manf#. Don't split this group.
+ # Two manf# numbers, but one is None: Some of the parts have no manf# but
+ # are otherwise identical to the other parts in the group. Don't split
+ # this group. Instead, propagate the non-None manf# to all the parts.
+ # Two manf#, neither is None: All parts have non-None manf# numbers.
+ # Split the group into two smaller groups of parts all having the same
+ # manf#.
+ # Three or more manf#: Split this group into smaller groups, each one with
+ # parts having the same manf#, even if it's None. It's impossible to
+ # determine which manf# the None parts should be assigned to, so leave
+ # their manf# as None.
+ new_component_groups = [] # Copy new component groups into this.
+ for g, grp in list(component_groups.items()):
+ num_manf_nums = len(grp.manf_nums)
+ if num_manf_nums == 1:
+ new_component_groups.append(grp)
+ continue # Single manf#. Don't split this group.
+ elif num_manf_nums == 2 and None in grp.manf_nums:
+ new_component_groups.append(grp)
+ continue # Two manf#, but one of them is None. Don't split this group.
+ # Otherwise, split the group into subgroups, each with the same manf#.
+ for manf_num in grp.manf_nums:
+ sub_group = IdenticalComponents()
+ sub_group.manf_nums = [manf_num]
+ sub_group.refs = []
+ for ref in grp.refs:
+ # Use get() which returns None if the component has no manf# field.
+ # That will match if the group manf_num is also None.
+ if components[ref].get('manf#') == manf_num:
+ sub_group.refs.append(ref)
+ new_component_groups.append(sub_group)
+
+ prj_info = {'title':'test_title','company':'test_company'} # Not implemented yet.
+
+ # Now return the list of identical part groups.
+ return new_component_groups, prj_info
+
+ # Now return a list of the groups without their hash keys.
+ return list(new_component_groups.values()), prj_info
+
+if __name__=='__main__':
+
+ file_handle=open('meacs.xml')
+ #~ file_handle=open('wiSensAFE.xml')
+
+ get_part_groups_altium(file_handle,'','')
diff --git a/kicost/eda_tools/subparts.py b/kicost/eda_tools/subparts.py
new file mode 100644
index 000000000..3171153d0
--- /dev/null
+++ b/kicost/eda_tools/subparts.py
@@ -0,0 +1,203 @@
+# MIT license
+#
+# Copyright (C) 2015 by XESS Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# Libraries.
+import re # Regular expression parser.
+#from ..kicost import distributors
+from ..kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE
+distributors = ['rs','digikey','mouser','newark','farnell']
+
+# Author information.
+__author__ = 'Hildo Guillardi Junior'
+__webpage__ = 'https://github.com/hildogjr/'
+__company__ = 'University of Campinas - Brazil'
+
+__all__ = ['subpart_split','subpart_qty']
+
+QTY_SSTR = '[\:]' # String that separate the subpart quantity and the
+ # manufacture/distributor code.
+PART_SSTR = '[\;\,]' # String that separate the part (manufacture/
+ # distributor code) in the list.
+SUB_SSTR = ''#'.' # String to separete the subpart in the new reference create.
+
+# Definitions to parse the manufature / distributor code to allow
+# sub parts and diferent quantities (even fraction) in these.
+
+
+
+# components = subpart_split(accepted_components)
+
+# ------------------ Public functions
+
+def subpart_split(components):
+ # Take each part and the all manufacture/distributors combination
+ # possibility to split in subpart the components part that have
+ # more than one manufacture/distributors code.
+ # For each designator...
+ logger.log(DEBUG_OVERVIEW, 'Search for subpart in the designed parts...')
+ designator = list(components.keys())
+ dist = [d+'#' for d in distributors]
+ dist.append('manf#')
+ for parts_index in range(len(designator)):
+ part = components[designator[parts_index]]
+ try:
+ # Divide the subparts in diferent parts keeping the other fields
+ # (reference, description, ...).
+ # First search for the used filed to manufacture/distributor numbers
+ # and how many subparts are in them. Use the loop also to extract the
+ # manufacture/distributor codes in list.
+ founded_fields = []
+ subparts_qty = 0
+ subparts_manf = dict()
+ for field_code in dist:
+ if field_code in part:
+ subparts_qty = max(subparts_qty,
+ len( subpart_list(part[field_code]) ) ) # Quantity of sub parts.
+ founded_fields += [field_code]
+ subparts_manf[field_code] = subpart_list(part[field_code])
+ if not founded_fields:
+ continue # If not manf/distributor code pass to next.
+ if logger.isEnabledFor(DEBUG_DETAILED):
+ print(designator,'>>',founded_fields)
+ # Second, if more than one subpart, split the sub parts as
+ # new components with the same description, footprint, and
+ # so on... Get the subpar
+ if subparts_qty>1:
+ # Remove the actual part from the list.
+ part_actual = components.pop(designator[parts_index])
+ part_actual_value = part_actual['value']
+ # Add the splited subparts.
+ for subparts_index in range(0,subparts_qty):
+ # Create a sub component based on the main component with
+ # the subparts. Modity the designator and the part. Create
+ # a sub quantity field.
+ subpart_actual = part_actual
+ for field_manf in founded_fields:
+ # For each manufacture/distributor code take the same order of
+ # the code list and split in each subpart. When not founded one
+ # part, do not add.
+ # e.g. U1:{'manf#':'PARTG1;PARTG2;PARTG3', 'mouser#''PARTM1;PARTM2'}
+ # result:
+ # U1.1:{'manf#':'PARTG1', 'mouser#':'PARTM1'}
+ # U1.2:{'manf#':'PARTG2', 'mouser#':'PARTM2'}
+ # U1.3:{'manf#':'PARTG3'}
+ try:
+ p_manf = subparts_manf[field_manf][subparts_index]
+ subpart_qty, subpart_part = subpart_qtypart(p_manf)
+ subpart_actual['value'] = '{v} - p{idx}/{total}'.format(
+ v=part_actual_value,
+ idx=subparts_index+1,
+ total=subparts_qty)
+ subpart_actual[field_manf] = subpart_part
+ subpart_actual[field_manf+'_subqty'] = subpart_qty
+ if logger.isEnabledFor(DEBUG_OBSESSIVE):
+ print(subpart_actual)
+ except IndexError:
+ pass
+ ref = designator[parts_index] + SUB_SSTR + str(subparts_index + 1)
+ components.update({ref:subpart_actual.copy()})
+ except KeyError:
+ continue
+ return components
+
+
+def subpart_qty(component):
+ # Calculate the string of the quantity of the item parsing the
+ # referente (design) quantity and the sub quantity (in case that
+ # was a sub part of a manufacture/distributor code).
+ try:
+ if logger.isEnabledFor(DEBUG_OBSESSIVE):
+ print('Qty>>',component.refs,'>>',
+ component.fields.get('manf#_subqty'), '*',
+ component.fields.get('manf#'))
+ subqty = component.fields.get('manf#_subqty')
+ string = '={{}}*{qty}'.format(qty=len(component.refs))
+ if subqty != '1' and subqty != None:
+ string = '=CEILING({{}}*({subqty})*{qty},1)'.format(
+ subqty=subqty,
+ qty=len(component.refs))
+ else:
+ string = '={{}}*{qty}'.format(qty=len(component.refs))
+ except (KeyError, TypeError):
+ if logger.isEnabledFor(DEBUG_OBSESSIVE):
+ print('Qty>>',component.refs,'>>',len(component.refs))
+ string = '={{}}*{qty}'.format(qty=len(component.refs))
+ return string
+
+
+
+# ------------------ Private functions
+
+def subpart_list(part):
+ # Get the list f sub parts manufacture / distributor code
+ # numbers striping the spaces and keeping the sub part
+ # quantity information, these have to be separated by
+ # PART_SSTR definition.
+ return re.split('(? ('4.5', 'ADUM3150BRSZ-RL7')
+ # '4/5 : ADUM3150BRSZ-RL7' -> ('4/5', 'ADUM3150BRSZ-RL7')
+ # '7:ADUM3150BRSZ-RL7' -> ('7', 'ADUM3150BRSZ-RL7')
+ # 'ADUM3150BRSZ-RL7 : 7' -> ('7', 'ADUM3150BRSZ-RL7')
+ # 'ADUM3150BRSZ-RL7' -> ('1', 'ADUM3150BRSZ-RL7')
+ # 'ADUM3150BRSZ-RL7:' -> ('1', 'ADUM3150BRSZ-RL7') forgot the qty understood '1'
+ strings = re.split('\s*' + QTY_SSTR + '\s*', subpart)
+ if len(strings)==2:
+ # Search for numbers, matching with simple, frac and decimal ones.
+ num_format = re.compile("^\s*[\-\+]?\s*[0-9]*\s*[\.\/]*\s*?[0-9]*\s*$")
+ string0_test = re.match(num_format, strings[0])
+ string1_test = re.match(num_format, strings[1])
+ if string0_test and not(string1_test):
+ qty = strings[0].strip()
+ part = strings[1].strip()
+ elif not(string0_test) and string1_test:
+ qty = strings[1].strip()
+ part = strings[0].strip()
+ elif string0_test and string1_test:
+ # May be founded a just numeric manufacture/distributor part,
+ # in this case, the quantity is a shortest string not
+ #considering "." and "/" marks.
+ if len(re.sub('[\.\/]','',strings[0])) < re.sub('[\.\/]','',len(strings[1])):
+ qty = strings[0].strip()
+ part = strings[1].strip()
+ else:
+ qty = strings[1].strip()
+ part = strings[0].strip()
+ else:
+ qty = '1'
+ part = strings[0].strip() + strings[1].strip()
+ if qty=='':
+ qty = '1'
+ else:
+ qty = '1'
+ part = ''.join(strings)
+ if logger.isEnabledFor(DEBUG_OBSESSIVE):
+ print('part/qty>>', subpart, '\t\tpart>>', part, '\tqty>>', qty)
+ return qty, part
diff --git a/kicost/kicost.py b/kicost/kicost.py
index d78819628..15ecbe7b1 100644
--- a/kicost/kicost.py
+++ b/kicost/kicost.py
@@ -36,21 +36,25 @@
import sys
import pprint
+import copy
import re # Regular expression parser.
import difflib
import logging
import tqdm
import os
from bs4 import BeautifulSoup # XML file interpreter.
-from random import randint
import xlsxwriter # XLSX file interpreter.
from xlsxwriter.utility import xl_rowcol_to_cell, xl_range, xl_range_abs
from yattag import Doc, indent # For generating HTML page for local parts.
import multiprocessing
from multiprocessing import Pool # For running web scrapes in parallel.
-import http.client # For web scraping exceptions.
from datetime import datetime
+try:
+ from urllib.parse import urlsplit, urlunsplit
+except ImportError:
+ from urlparse import quote as urlsplit, urlunsplit
+
# Stops UnicodeDecodeError exceptions.
try:
reload(sys)
@@ -58,25 +62,10 @@
except NameError:
pass # Happens if reload is attempted in Python 3.
-def FakeBrowser(url):
- req = Request(url)
- req.add_header('Accept-Language', 'en-US')
- req.add_header('User-agent', get_user_agent())
- return req
-
class PartHtmlError(Exception):
'''Exception for failed retrieval of an HTML parse tree for a part.'''
pass
-try:
- from urllib.parse import urlencode, quote as urlquote, urlsplit, urlunsplit
- import urllib.request
- from urllib.request import urlopen, Request
-except ImportError:
- from urlparse import quote as urlquote, urlsplit, urlunsplit
- from urllib import urlencode
- from urllib2 import urlopen, Request
-
# ghost library allows scraping pages that have Javascript challenge pages that
# screen-out robots. Digi-Key stopped doing this, so it's not needed at the moment.
# Also requires installation of Qt4.8 (not 5!) and pyside.
@@ -84,36 +73,20 @@ class PartHtmlError(Exception):
__all__ = ['kicost'] # Only export this routine for use by the outside world.
-# Used to get the names of functions in this module so they can be called dynamically.
-THIS_MODULE = locals()
-
-ALL_MODULES = globals()
-
SEPRTR = ':' # Delimiter between library:component, distributor:field, etc.
-HTML_RESPONSE_RETRIES = 2 # Num of retries for getting part data web page.
-
-WEB_SCRAPE_EXCEPTIONS = (urllib.request.URLError, http.client.HTTPException)
-# Global array of distributor names.
-distributors = {}
-
logger = logging.getLogger('kicost')
-
-
DEBUG_OVERVIEW = logging.DEBUG
DEBUG_DETAILED = logging.DEBUG-1
DEBUG_OBSESSIVE = logging.DEBUG-2
+# Import other EDA importer routines.
# Altium requires a different part grouping function than KiCad.
-from .altium.altium import get_part_groups_altium
+from .eda_tools import *
-# Import web scraping functions for various distributor websites.
-from .local import *
-from .digikey import *
-from .newark import *
-from .mouser import *
-from .rs import *
-from .farnell import *
+# Import information about various distributors.
+from . import distributors as distributor_imports
+distributors = distributor_imports.distributors
# Generate a dictionary to translate all the different ways people might want
# to refer to part numbers, vendor numbers, and such.
@@ -177,6 +150,7 @@ def kicost(in_file, out_filename, user_fields, ignore_fields, variant, num_proce
include_dist_list = list(distributors.keys())
rmv_dist = set(exclude_dist_list)
rmv_dist |= set(list(distributors.keys())) - set(include_dist_list)
+ rmv_dist -= set(['local_template']) # We need this later for creating non-web distributors.
for dist in rmv_dist:
distributors.pop(dist, None)
@@ -414,7 +388,7 @@ def extract_fields(part, variant):
#print('Removed parts:', set(components.keys())-set(accepted_components.keys()))
# Replace the component list with the list of accepted parts.
- components = accepted_components
+ components = subpart_split(accepted_components)
# Now partition the parts into groups of like components.
# First, get groups of identical components but ignore any manufacturer's
@@ -501,6 +475,51 @@ def extract_fields(part, variant):
grp_fields[key] = val
grp.fields = grp_fields
+ # Put the components groups in the spreadsheet rows in a spefic order
+ # using the reference string of the components. The order is defined
+ # by BOM_ORDER.
+ ref_identifiers = re.split('(?0:
+ # If found more than one group with the reference, use the 'manf#'
+ # as second order criterian.
+ if len(component_groups_ref_match)>1:
+ try:
+ for item in component_groups_ref_match:
+ component_groups_order_old.remove(item)
+ except ValueError:
+ pass
+ # Examine 'manf#' to get the order.
+ group_manf_list = [new_component_groups[h].fields.get('manf#') for h in component_groups_ref_match]
+ if group_manf_list:
+ m=group_manf_list
+ sorted_groups = sorted(range(len(group_manf_list)), key=lambda k:(group_manf_list[k] is None, group_manf_list[k]))
+# [i[0] for i in sorted(enumerate(group_manf_list), key=lambda x:x[1])]
+ if logger.isEnabledFor(DEBUG_OBSESSIVE):
+ print(group_manf_list,' > order: ', sorted_groups)
+ component_groups_ref_match = [component_groups_ref_match[i] for i in sorted_groups]
+ component_groups_order_new += component_groups_ref_match
+ else:
+ try:
+ component_groups_order_old.remove(component_groups_ref_match[0])
+ except ValueError:
+ pass
+ component_groups_order_new += component_groups_ref_match
+ # The new order is the found refs firt and at the last the not referenced in BOM_ORDER.
+ component_groups_order_new += component_groups_order_old # Add the missing references groups.
+ new_component_groups = [new_component_groups[i] for i in component_groups_order_new]
+
# Now return the list of identical part groups.
return new_component_groups, prj_info
@@ -526,14 +545,14 @@ def create_local_part_html(parts):
dist = key[:key.index(SEPRTR)]
except ValueError:
continue
+
+ # If the distributor is not in the list of web-scrapable distributors,
+ # then it's a local distributor. Copy the local distributor template
+ # and add it to the table of distributors.
if dist not in distributors:
- distributors[dist] = {
- 'scrape': 'local',
- 'function': 'local',
- 'label': dist,
- 'order_cols': ['purch', 'part_num', 'refs'],
- 'order_delimiter': ''
- }
+ distributors[dist] = copy.copy(distributors['local_template'])
+ distributors[dist]['label'] = dist # Set dist name for spreadsheet header.
+
# Now look for catalog number, price list and webpage link for this part.
for dist in distributors:
cat_num = p.fields.get(dist+':cat#')
@@ -562,6 +581,11 @@ def make_random_catalog_number(p):
link = urlunsplit(url_parts)
with tag('div', klass='link'):
text(link)
+
+ # Remove the local distributor template so it won't be processed later on.
+ # It has served its purpose.
+ del distributors['local_template']
+
html = doc.getvalue()
if logger.isEnabledFor(DEBUG_OBSESSIVE):
print(indent(html))
@@ -596,64 +620,6 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia
'valign': 'vcenter',
'bg_color': '#303030'
}),
- 'digikey': workbook.add_format({
- 'font_size': 14,
- 'font_color': 'white',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#CC0000' # Digi-Key red.
- }),
- 'mouser': workbook.add_format({
- 'font_size': 14,
- 'font_color': 'white',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#004A85' # Mouser blue.
- }),
- 'newark': workbook.add_format({
- 'font_size': 14,
- 'font_color': 'white',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#A2AE06' # Newark/E14 olive green.
- }),
- 'rs': workbook.add_format({
- 'font_size': 14,
- 'font_color': 'white',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#FF0000' # RS Components red.
- }),
- 'farnell': workbook.add_format({
- 'font_size': 14,
- 'font_color': 'white',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#FF6600' # Farnell/E14 orange.
- }),
- 'local_lbl': [
- workbook.add_format({
- 'font_size': 14,
- 'font_color': 'black',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#909090' # Darker grey.
- }),
- workbook.add_format({
- 'font_size': 14,
- 'font_color': 'black',
- 'bold': True,
- 'align': 'center',
- 'valign': 'vcenter',
- 'bg_color': '#c0c0c0' # Lighter grey.
- }),
- ],
'header': workbook.add_format({
'font_size': 12,
'bold': True,
@@ -692,8 +658,10 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia
'num_format': '$#,##0.00',
'valign': 'vcenter'
}),
- 'founded_perc': workbook.add_format({
+ 'found_part_pct': workbook.add_format({
'font_size': 12,
+ 'bold': True,
+ 'italic': True,
'valign': 'vcenter'
}),
'proj_info_field': workbook.add_format({
@@ -708,15 +676,19 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia
'valign': 'vcenter'
}),
'best_price': workbook.add_format({'bg_color': '#80FF80', }),
- 'insufficient_qty': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}),
+ 'not_available': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}),
+ 'order_too_much': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}),
+ 'too_few_available': workbook.add_format({'bg_color': '#FF9900', 'font_color':'black'}),
+ 'too_few_purchased': workbook.add_format({'bg_color': '#FFFF00'}),
'not_stocked': workbook.add_format({'font_color': '#909090', 'align': 'right' }),
- 'not_purchased' : workbook.add_format({'bg_color': '#FFFF00'}),
- 'not_founded' : workbook.add_format({'bg_color': '#FF0000'}),
- 'not_enough' : workbook.add_format({'bg_color': '#FFFF00'}),
'currency': workbook.add_format({'num_format': '$#,##0.00'}),
'centered_text': workbook.add_format({'align': 'center'}),
}
+ # Add the distinctive header format for each distributor to the dict of formats.
+ for d in distributors:
+ wrk_formats[d] = workbook.add_format(distributors[d]['wrk_hdr_format'])
+
# Create the worksheet that holds the pricing information.
wks = workbook.add_worksheet(WORKSHEET_NAME)
@@ -793,13 +765,11 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia
dist_list = web_dists + local_dists
# Load the part information from each distributor into the sheet.
- index = 0
for dist in dist_list:
dist_start_col = next_col
- next_col = add_dist_to_worksheet(wks, wrk_formats, index, START_ROW,
+ next_col = add_dist_to_worksheet(wks, wrk_formats, START_ROW,
dist_start_col, UNIT_COST_ROW, TOTAL_COST_ROW,
refs_col, qty_col, dist, parts)
- index = (index+1) % 2
# Create a defined range for each set of distributor part data.
workbook.define_name(
'{}_part_data'.format(dist), '={wks_name}!{data_range}'.format(
@@ -931,7 +901,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
'level': 0,
'label': 'Manf#',
'width': None,
- 'comment': 'Manufacturer number for each part.\nRed -> Not founded parts\nYellow -> Not enough aval.',
+ 'comment': 'Manufacturer number for each part.',
'static': True,
},
'qty': {
@@ -939,7 +909,10 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
'level': 0,
'label': 'Qty',
'width': None,
- 'comment': 'Total number of each part needed to assemble the board.\nYellow -> Not purchased part enough.',
+ 'comment': '''Total number of each part needed to assemble the board.
+Red -> No parts available.
+Orange -> Parts available, but not enough.
+Yellow -> Enough parts available, but haven't purchased enough.''',
'static': False,
},
'unit_price': {
@@ -947,8 +920,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
'level': 0,
'label': 'Unit$',
'width': None,
- 'comment':
- 'Minimum unit price for each part across all distributors.',
+ 'comment': 'Minimum unit price for each part across all distributors.',
'static': False,
},
'ext_price': {
@@ -956,8 +928,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
'level': 0,
'label': 'Ext$',
'width': 15, # Displays up to $9,999,999.99 without "###".
- 'comment':
- 'Minimum extended price for each part across all distributors.',
+ 'comment': 'Minimum extended price for each part across all distributors.',
'static': False,
},
}
@@ -980,7 +951,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
'level': 0,
'label': user_field,
'width': None,
- 'comment': 'User-defined field',
+ 'comment': 'User-defined field.',
'static': True,
}
@@ -1027,73 +998,86 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
# Enter total part quantity needed.
try:
+ part_qty = subpart_qty(part);
wks.write(row, start_col + columns['qty']['col'],
- '=BoardQty*{}'.format(len(part.refs)))
+ part_qty.format('BoardQty') )
+ # '=BoardQty*{}'.format(len(part.refs)))
except KeyError:
pass
-
- # Enter spreadsheet formula for getting the minimum unit price from all the distributors.
+ # Gather the cell references for calculating minimum unit price and part availability.
dist_unit_prices = []
- dist_purchased_qty = []
- qty_not_enough = []
- part_not_founded = []
+ dist_qty_avail = []
+ dist_qty_purchased = []
for dist in list(distributors.keys()):
+
# Get the name of the data range for this distributor.
- dist_part_data_range = '{}_part_data'.format(dist)
+ dist_data_rng = '{}_part_data'.format(dist)
+
# Get the contents of the unit price cell for this part (row) and distributor (column+offset).
dist_unit_prices.append(
- 'INDIRECT(ADDRESS(ROW(),COLUMN({})+2))'.format(
- dist_part_data_range))
- # Get the purchased quantity cell reference.
- dist_purchased_qty.append(
- 'IF(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({dist_part})+2))),INDIRECT(ADDRESS(ROW(),COLUMN({dist_part})+1)),0)'.format(dist_part=dist_part_data_range))
- # Get the contents of the unit price cell for this part (row) and distributor (column+offset).
- qty_not_enough.append(
- 'INDIRECT(ADDRESS(ROW(),COLUMN({})))'.format(
- dist_part_data_range))
- # Get the contents of the unit price cell for this part (row) and distributor (column+offset).
- part_not_founded.append(
- 'NOT(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({})+2))))'.format(
- dist_part_data_range))
- # Create the function that finds the minimum of all the distributor unit price cells for this part.
- wks.write(row, start_col + columns['unit_price']['col'],
- '=MINA({})'.format(','.join(dist_unit_prices)),
- wrk_formats['currency'])
- # Create a function that warnning the user if he do not purche the necessary quantity.
- wks.conditional_format(row, start_col + columns['qty']['col'],
- row, start_col + columns['qty']['col'], {
- 'type': 'cell',
- 'criteria': '>',
- 'value': '=SUM({})'.format(','.join(dist_purchased_qty)),
- 'format': wrk_formats['not_purchased']
- })
- # Create a function that error if not found part in any distributor.
- # Add first to be prioritary to the next one.
- wks.conditional_format(row, start_col + columns['manf#']['col'],
- row, start_col + columns['manf#']['col'], {
- 'type': 'formula',
- 'criteria': '=AND({})'.format(','.join(part_not_founded)),
- 'format': wrk_formats['not_founded']
- })
- # Create a function that warnning if not avaliable the necessary quantity.
- wks.conditional_format(row, start_col + columns['manf#']['col'],
- row, start_col + columns['manf#']['col'], {
- 'type': 'formula',
- 'criteria': '=SUM({formula})<{qty_needed}'.format(
- formula=','.join(qty_not_enough),
- qty_needed=xl_rowcol_to_cell(row, start_col + columns['qty']['col'])),
- 'format': wrk_formats['not_enough']
- })
+ 'INDIRECT(ADDRESS(ROW(),COLUMN({})+2))'.format(dist_data_rng))
+
+ # Get the contents of the quantity purchased cell for this part and distributor
+ # unless the unit price is not a number in which case return 0.
+ dist_qty_purchased.append(
+ 'IF(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({0})+2))),INDIRECT(ADDRESS(ROW(),COLUMN({0})+1)),0)'.format(dist_data_rng))
+
+ # Get the contents of the quantity available cell of this part from this distributor.
+ dist_qty_avail.append(
+ 'INDIRECT(ADDRESS(ROW(),COLUMN({})+0))'.format(dist_data_rng))
- # Enter spreadsheet formula for calculating minimum extended price.
+ # Enter the spreadsheet formula to find this part's minimum unit price across all distributors.
+ wks.write_formula(
+ row, start_col + columns['unit_price']['col'],
+ '=MINA({})'.format(','.join(dist_unit_prices)),
+ wrk_formats['currency']
+ )
+
+ # Enter the spreadsheet formula for calculating the minimum extended price.
wks.write_formula(
row, start_col + columns['ext_price']['col'],
'=iferror({qty}*{unit_price},"")'.format(
- qty=xl_rowcol_to_cell(row, start_col + columns['qty']['col']),
- unit_price=xl_rowcol_to_cell(row, start_col +
- columns['unit_price']['col'])),
- wrk_formats['currency'])
+ qty = xl_rowcol_to_cell(row, start_col + columns['qty']['col']),
+ unit_price = xl_rowcol_to_cell(row, start_col + columns['unit_price']['col'])
+ ),
+ wrk_formats['currency']
+ )
+
+ # If part is unavailable from all distributors, color quantity cell red.
+ wks.conditional_format(
+ row, start_col + columns['qty']['col'],
+ row, start_col + columns['qty']['col'],
+ {
+ 'type': 'formula',
+ 'criteria': '=IF(SUM({})=0,1,0)'.format(','.join(dist_qty_avail)),
+ 'format': wrk_formats['not_available']
+ }
+ )
+
+ # If total available part quantity is less than needed quantity, color cell orange.
+ wks.conditional_format(
+ row, start_col + columns['qty']['col'],
+ row, start_col + columns['qty']['col'],
+ {
+ 'type': 'cell',
+ 'criteria': '>',
+ 'value': '=SUM({})'.format(','.join(dist_qty_avail)),
+ 'format': wrk_formats['too_few_available']
+ }
+ )
+
+ # If total purchased part quantity is less than needed quantity, color cell yellow.
+ wks.conditional_format(
+ row, start_col + columns['qty']['col'],
+ row, start_col + columns['qty']['col'],
+ {
+ 'type': 'cell',
+ 'criteria': '>',
+ 'value': '=SUM({})'.format(','.join(dist_qty_purchased)),
+ 'format': wrk_formats['too_few_purchased'],
+ }
+ )
# Enter part shortage quantity.
try:
@@ -1116,7 +1100,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col,
return start_col + num_cols, start_col + columns['refs']['col'], start_col + columns['qty']['col']
-def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
+def add_dist_to_worksheet(wks, wrk_formats, start_row, start_col,
unit_cost_row, total_cost_row, part_ref_col, part_qty_col,
dist, parts):
'''Add distributor-specific part data to the spreadsheet.'''
@@ -1129,15 +1113,16 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
'level': 1, # Outline level (or hierarchy level) for this column.
'label': 'Avail', # Column header label.
'width': None, # Column width (default in this case).
- 'comment': 'Available quantity of each part at the distributor.\nRed -> necessary quantity is not available.'
- # Column header tool-tip.
+ 'comment': '''Available quantity of each part at the distributor.
+Red -> No quantity available.
+Orange -> Too little quantity available.'''
},
'purch': {
'col': 1,
'level': 2,
'label': 'Purch',
'width': None,
- 'comment': 'Purchase quantity of each part from this distributor.'
+ 'comment': 'Purchase quantity of each part from this distributor.\nRed -> Purchasing more than the available quantity.'
},
'unit_price': {
'col': 2,
@@ -1152,7 +1137,7 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
'label': 'Ext$',
'width': 15, # Displays up to $9,999,999.99 without "###".
'comment':
- '(Unit Price) x (Purchase Qty) of each part from this distributor.\nRed -> next price break is cheaper.\nGreen -> cheapest supplier.'
+ '(Unit Price) x (Purchase Qty) of each part from this distributor.\nRed -> Next price break is cheaper.\nGreen -> Cheapest supplier.'
},
'part_num': {
'col': 4,
@@ -1167,12 +1152,8 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
row = start_row # Start building distributor section at this row.
# Add label for this distributor.
- try:
- wks.merge_range(row, start_col, row, start_col + num_cols - 1,
+ wks.merge_range(row, start_col, row, start_col + num_cols - 1,
distributors[dist]['label'].title(), wrk_formats[dist])
- except KeyError:
- wks.merge_range(row, start_col, row, start_col + num_cols - 1,
- distributors[dist]['label'].title(), wrk_formats['local_lbl'][index])
row += 1 # Go to next row.
# Add column headers, comments, and outline level (for hierarchy).
@@ -1252,6 +1233,7 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
# Sort the tiers based on quantities and turn them into lists of strings.
qtys = sorted(price_tiers.keys())
+ avail_qty_col = start_col + columns['avail']['col']
purch_qty_col = start_col + columns['purch']['col']
unit_price_col = start_col + columns['unit_price']['col']
ext_price_col = start_col + columns['ext_price']['col']
@@ -1272,20 +1254,45 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
for q in qtys[1:]: # Skip the first qty which is always 0.
price_break_info += '\n{:>6d} {:>7s} {:>10s}'.format(
q,
- '${:.3f}'.format(price_tiers[q]),
+ '${:.2f}'.format(price_tiers[q]),
'${:.2f}'.format(price_tiers[q] * q))
wks.write_comment(row, unit_price_col, price_break_info)
+ # Conditional format to show no quantity is available.
+ wks.conditional_format(
+ row, start_col + columns['avail']['col'],
+ row, start_col + columns['avail']['col'],
+ {
+ 'type': 'cell',
+ 'criteria': '==',
+ 'value': 0,
+ 'format': wrk_formats['not_available']
+ }
+ )
+
# Conditional format to show the avaliable quantity is less than required.
- wks.conditional_format(row, start_col + columns['avail']['col'],
- row, start_col + columns['avail']['col'], {
+ wks.conditional_format(
+ row, start_col + columns['avail']['col'],
+ row, start_col + columns['avail']['col'],
+ {
'type': 'cell',
'criteria': '<',
- 'value': '=iferror(if({purch_qty}="",{needed_qty},{purch_qty}),"")'.format(
- needed_qty=xl_rowcol_to_cell(row, part_qty_col),
- purch_qty=xl_rowcol_to_cell(row, purch_qty_col)),
- 'format': wrk_formats['insufficient_qty']
- })
+ 'value': xl_rowcol_to_cell(row, part_qty_col),
+ 'format': wrk_formats['too_few_available']
+ }
+ )
+
+ # Conditional format to show the purchase quantity is more than what is available.
+ wks.conditional_format(
+ row, start_col + columns['purch']['col'],
+ row, start_col + columns['purch']['col'],
+ {
+ 'type': 'cell',
+ 'criteria': '>',
+ 'value': xl_rowcol_to_cell(row, avail_qty_col),
+ 'format': wrk_formats['order_too_much']
+ }
+ )
# Conditionally format the unit price cell that contains the best price.
wks.conditional_format(row, unit_price_col, row, unit_price_col, {
@@ -1324,13 +1331,13 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col,
PART_INFO_LAST_ROW, total_cost_col)),
wrk_formats['total_cost_currency'])
- # Show the percentual of founded components.
+ # Show how many parts were found at this distributor.
wks.write(unit_cost_row, total_cost_col,
- '=(ROWS({count_range})-COUNTBLANK({count_range}))&"/"&ROWS({count_range})&" founded"'.format(
+ '=(ROWS({count_range})-COUNTBLANK({count_range}))&" of "&ROWS({count_range})&" parts found"'.format(
count_range=xl_range(PART_INFO_FIRST_ROW, total_cost_col,
PART_INFO_LAST_ROW, total_cost_col)),
- wrk_formats['founded_perc'])
- wks.write_comment(unit_cost_row, total_cost_col, 'Founded components in this distributor.')
+ wrk_formats['found_part_pct'])
+ wks.write_comment(unit_cost_row, total_cost_col, 'Number of parts found at this distributor.')
# Add list of part numbers and purchase quantities for ordering from this distributor.
ORDER_START_COL = start_col + 1
@@ -1448,15 +1455,19 @@ def enter_order_info(info_col, order_col, numeric=False, delimiter=''):
num_to_text_func=num_to_text_func,
num_to_text_fmt=num_to_text_fmt)))
- # Write the header and how many parts is purchasing.
+ # Write the header and how many parts are being purchased.
+ purch_qty_col = start_col + columns['purch']['col']
ORDER_HEADER = PART_INFO_LAST_ROW + 2
- wks.write(ORDER_HEADER, purch_qty_col,
- '=IFERROR(IF(OR({count_range}),"Purch cart: "&COUNTIF({count_range},">0")&"/"&ROWS({count_range})&" purchased",""),"")'.format(
- count_range=xl_range(PART_INFO_FIRST_ROW, purch_qty_col,
- PART_INFO_LAST_ROW, purch_qty_col)),
- wrk_formats['founded_perc'])
+ wks.write_formula(
+ ORDER_HEADER, purch_qty_col,
+ '=IFERROR(IF(OR({count_range}),COUNTIF({count_range},">0")&" of "&ROWS({count_range})&" parts purchased",""),"")'.format(
+ count_range=xl_range(PART_INFO_FIRST_ROW, purch_qty_col,
+ PART_INFO_LAST_ROW, purch_qty_col)
+ ),
+ wrk_formats['found_part_pct']
+ )
wks.write_comment(ORDER_HEADER, purch_qty_col,
- 'Copy the code bellow to the distributor web site importer.')
+ 'Copy the information below to the BOM import page of the distributor web site.')
# For every column in the order info range, enter the part order information.
for col_tag in ('purch', 'part_num', 'refs'):
@@ -1467,40 +1478,10 @@ def enter_order_info(info_col, order_col, numeric=False, delimiter=''):
return start_col + num_cols # Return column following the globals so we know where to start next set of cells.
-def get_user_agent():
- # The default user_agent_list comprises chrome, IE, firefox, Mozilla, opera, netscape.
- # for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php
- user_agent_list = [
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
- "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
- "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
- "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
- "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
- "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
- "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
- "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
- ]
- return user_agent_list[randint(0, len(user_agent_list) - 1)]
-
-
-def get_part_html_tree(part, dist, distributor_dict, local_part_html, logger):
+def get_part_html_tree(part, dist, get_html_tree_func, local_part_html, logger):
'''Get the HTML tree for a part from the given distributor website or local HTML.'''
logger.log(DEBUG_OBSESSIVE, '%s %s', dist, str(part.refs))
-
- # Get function name for getting the HTML tree for this part from this distributor.
- function = distributor_dict[dist]['function']
- get_dist_part_html_tree = THIS_MODULE['get_{}_part_html_tree'.format(function)]
for extra_search_terms in set([part.fields.get('manf', ''), '']):
try:
@@ -1509,7 +1490,7 @@ def get_part_html_tree(part, dist, distributor_dict, local_part_html, logger):
# 2) the manufacturer's part number.
for key in (dist+'#', dist+SEPRTR+'cat#', 'manf#'):
if key in part.fields:
- return get_dist_part_html_tree(dist, part.fields[key], extra_search_terms, local_part_html=local_part_html)
+ return get_html_tree_func(dist, part.fields[key], extra_search_terms, local_part_html=local_part_html)
# No distributor or manufacturer number, so give up.
else:
logger.warning("No '%s#' or 'manf#' field: cannot lookup part %s at %s", dist, part.refs, dist)
@@ -1546,19 +1527,18 @@ def scrape_part(args):
# Scrape the part data from each distributor website or the local HTML.
for d in distributor_dict:
- # Get the HTML tree for the part.
- html_tree, url[d] = get_part_html_tree(part, d, distributor_dict, local_part_html, scrape_logger)
+ try:
+ dist_module = getattr(distributor_imports, d)
+ except AttributeError:
+ dist_module = getattr(distributor_imports, distributor_dict[d]['module'])
- # Get the function names for getting the part data from the HTML tree.
- function = distributor_dict[d]['function']
- get_dist_price_tiers = THIS_MODULE['get_{}_price_tiers'.format(function)]
- get_dist_part_num = THIS_MODULE['get_{}_part_num'.format(function)]
- get_dist_qty_avail = THIS_MODULE['get_{}_qty_avail'.format(function)]
+ # Get the HTML tree for the part.
+ html_tree, url[d] = get_part_html_tree(part, d, dist_module.get_part_html_tree, local_part_html, scrape_logger)
# Call the functions that extract the data from the HTML tree.
- part_num[d] = get_dist_part_num(html_tree)
- qty_avail[d] = get_dist_qty_avail(html_tree)
- price_tiers[d] = get_dist_price_tiers(html_tree)
+ part_num[d] = dist_module.get_part_num(html_tree)
+ qty_avail[d] = dist_module.get_qty_avail(html_tree)
+ price_tiers[d] = dist_module.get_price_tiers(html_tree)
# Return the part data.
return id, url, part_num, price_tiers, qty_avail
|