diff --git a/kicost/distributors/__init__.py b/kicost/distributors/__init__.py new file mode 100644 index 000000000..b017cfb71 --- /dev/null +++ b/kicost/distributors/__init__.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +__author__ = 'XESS Corporation' +__email__ = 'info@xess.com' + +from random import randint + +# Global constants for distributor site scraping. +import http.client # For web scraping exceptions. +try: + from urllib.parse import urlencode, quote as urlquote, urlsplit, urlunsplit + import urllib.request + from urllib.request import urlopen, Request +except ImportError: + from urlparse import quote as urlquote, urlsplit, urlunsplit + from urllib import urlencode + from urllib2 import urlopen, Request +HTML_RESPONSE_RETRIES = 2 # Num of retries for getting part data web page. +WEB_SCRAPE_EXCEPTIONS = (urllib.request.URLError, http.client.HTTPException) + + +def get_user_agent(): + # The default user_agent_list comprises chrome, IE, firefox, Mozilla, opera, netscape. + # for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php + user_agent_list = [ + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", + "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", + "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", + "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", + "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", + "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" + ] + return user_agent_list[randint(0, len(user_agent_list) - 1)] + +def FakeBrowser(url): + req = Request(url) + req.add_header('Accept-Language', 'en-US') + req.add_header('User-agent', get_user_agent()) + return req + + +# The global dictionary of distributor information starts out empty. +distributors = {} + +import os + +# The distributor module directories will be found in this directory. +directory = os.path.dirname(__file__) + +# Search for the distributor modules and import them. +for module in os.listdir(os.path.dirname(__file__)): + + # Avoid importing non-directories. + abs_module = os.path.join(directory, module) + if not os.path.isdir(abs_module): + continue + + # Avoid directories like __pycache__. + if module.startswith('__'): + continue + + # Import the module. + __import__(module, globals(), locals(), [], level=1) diff --git a/kicost/distributors/digikey/__init__.py b/kicost/distributors/digikey/__init__.py new file mode 100644 index 000000000..f5edaa7ec --- /dev/null +++ b/kicost/distributors/digikey/__init__.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- + +__author__ = 'XESS Corporation' +__email__ = 'info@xess.com' + +from .digikey import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'digikey': { + 'module': 'digikey', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'Digi-Key', # Distributor label used in spreadsheet columns. + 'order_cols': ['purch', 'part_num', 'refs'], # Sort-order for online orders. + 'order_delimiter': ',', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#CC0000' # Digi-Key red. + } + } + } +) + diff --git a/kicost/distributors/digikey/digikey.py b/kicost/distributors/digikey/digikey.py new file mode 100644 index 000000000..edec1f442 --- /dev/null +++ b/kicost/distributors/digikey/digikey.py @@ -0,0 +1,286 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the Digikey product page.''' + price_tiers = {} + try: + for tr in html_tree.find('table', id='product-dollars').find_all('tr'): + try: + td = tr.find_all('td') + qty = int(re.sub('[^0-9]', '', td[0].text)) + price_tiers[qty] = float(re.sub('[^0-9\.]', '', td[1].text)) + except (TypeError, AttributeError, ValueError, + IndexError): # Happens when there's no in table row. + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + logger.log(DEBUG_OBSESSIVE, 'No Digikey pricing information found!') + return price_tiers # Return empty price tiers. + return price_tiers + + +def part_is_reeled(html_tree): + '''Returns True if this Digi-Key part is reeled or Digi-reeled.''' + qty_tiers = list(get_price_tiers(html_tree).keys()) + if len(qty_tiers) > 0 and min(qty_tiers) >= 100: + return True + if html_tree.find('table', + id='product-details-reel-pricing') is not None: + return True + return False + + +def get_part_num(html_tree): + '''Get the part number from the Digikey product page.''' + try: + return re.sub('\s', '', html_tree.find('td', + id='reportPartNumber').text) + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'No Digikey part number found!') + return '' + + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the Digikey product page.''' + try: + qty_tree = html_tree.find('td', id='quantityAvailable').find('span', id='dkQty') + qty_str = qty_tree.text + except AttributeError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + try: + qty_str = re.search('([0-9,]*)', qty_str, re.IGNORECASE).group(1) + return int(re.sub('[^0-9]', '', qty_str)) + except (AttributeError, ValueError): + # Didn't find the usual quantity text field. This might be one of those + # input fields for requesting a quantity, so get the value from the + # input field. + try: + logger.log(DEBUG_OBSESSIVE, 'No Digikey part quantity found!') + return int(qty_tree.find('input', type='text').get('value')) + except (AttributeError, ValueError): + # Well, there's a quantityAvailable section in the website, but + # it doesn't contain anything decipherable. Let's just assume it's 0. + return 0 + + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the Digikey HTML page for a part number and return the URL and parse tree.''' + + def merge_price_tiers(main_tree, alt_tree): + '''Merge the price tiers from the alternate-packaging tree into the main tree.''' + try: + insertion_point = main_tree.find('table', id='product-dollars').find('tr') + for tr in alt_tree.find('table', id='product-dollars').find_all('tr'): + insertion_point.insert_after(tr) + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'Problem merging price tiers for Digikey part {} with alternate packaging!'.format(pn)) + + def merge_qty_avail(main_tree, alt_tree): + '''Merge the quantities from the alternate-packaging tree into the main tree.''' + try: + main_qty = get_qty_avail(main_tree) + alt_qty = get_qty_avail(alt_tree) + if main_qty is None: + merged_qty = alt_qty + elif alt_qty is None: + merged_qty = main_qty + else: + merged_qty = max(main_qty, alt_qty) + if merged_qty is not None: + insertion_point = main_tree.find('td', id='quantityAvailable').find('span', id='dkQty') + insertion_point.string = '{}'.format(merged_qty) + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'Problem merging available quantities for Digikey part {} with alternate packaging!'.format(pn)) + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: + url = 'http://www.digikey.com/scripts/DkSearch/dksus.dll?WT.z_header=search_go&lang=en&keywords=' + urlquote( + pn + ' ' + extra_search_terms, + safe='') + #url = 'http://www.digikey.com/product-search/en?KeyWords=' + urlquote(pn,safe='') + '&WT.z_header=search_go' + elif url[0] == '/': + url = 'http://www.digikey.com' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + req = FakeBrowser(url) + for _ in range(HTML_RESPONSE_RETRIES): + try: + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) + raise PartHtmlError + + # Use the following code if Javascript challenge pages are used to block scrapers. + # try: + # ghst = Ghost() + # sess = ghst.start(plugins_enabled=False, download_images=False, show_scrollbars=False, javascript_enabled=False) + # html, resources = sess.open(url) + # print('type of HTML is {}'.format(type(html.content))) + # html = html.content + # except Exception as e: + # print('Exception reading with Ghost: {}'.format(e)) + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then return it. + if tree.find('div', class_='product-top-section') is not None: + + # Digikey separates cut-tape and reel packaging, so we need to examine more pages + # to get all the pricing info. But don't descend any further if limit has been reached. + if descend > 0: + try: + # Find all the URLs to alternate-packaging pages for this part. + ap_urls = [ + ap.find('li', class_='lnkAltPack').find_all('a')[-1].get('href') + for ap in tree.find( + 'div', class_='bota', + id='additionalPackaging').find_all( + 'ul', class_='more-expander-item') + ] + logger.log(DEBUG_OBSESSIVE,'Found {} alternate packagings for {} from {}'.format(len(ap_urls), pn, dist)) + ap_trees_and_urls = [] # Initialize as empty in case no alternate packagings are found. + try: + ap_trees_and_urls = [get_part_html_tree(dist, pn, + extra_search_terms, ap_url, descend=0) + for ap_url in ap_urls] + except Exception: + logger.log(DEBUG_OBSESSIVE,'Failed to find alternate packagings for {} from {}'.format(pn, dist)) + + # Put the main tree on the list as well and then look through + # the entire list for one that's non-reeled. Use this as the + # main page for the part. + ap_trees_and_urls.append((tree, url)) + if part_is_reeled(tree): + for ap_tree, ap_url in ap_trees_and_urls: + if not part_is_reeled(ap_tree): + # Found a non-reeled part, so use it as the main page. + tree = ap_tree + url = ap_url + break # Done looking. + + # Now go through the other pages, merging their pricing and quantity + # info into the main page. + for ap_tree, ap_url in ap_trees_and_urls: + if ap_tree is tree: + continue # Skip examining the main tree. It already contains its info. + try: + # Merge the pricing info from that into the main parse tree to make + # a single, unified set of price tiers... + merge_price_tiers(tree, ap_tree) + # and merge available quantity, using the maximum found. + merge_qty_avail(tree, ap_tree) + except AttributeError: + logger.log(DEBUG_OBSESSIVE,'Problem merging price/qty for {} from {}'.format(pn, dist)) + continue + except AttributeError as e: + logger.log(DEBUG_OBSESSIVE,'Problem parsing URLs from product page for {} from {}'.format(pn, dist)) + + return tree, url # Return the parse tree and the URL where it came from. + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('table', id='productTable') is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find( + 'table', + id='productTable').find('tbody').find_all('tr') + + # Extract the product links for the part numbers from the table. + # Extract links for both manufacturer and catalog numbers. + product_links = [p.find('td', + class_='tr-mfgPartNumber').a + for p in products] + product_links.extend([p.find('td', + class_='tr-dkPartNumber').a + for p in products]) + + # Extract all the part numbers from the text portion of the links. + part_numbers = [l.text for l in product_links] + + # Look for the part number in the list that most closely matches the requested part number. + match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + + # Now look for the link that goes with the closest matching part number. + for l in product_links: + if l.text == match: + # Get the tree for the linked-to page and return that. + logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) + return get_part_html_tree(dist, pn, extra_search_terms, + url=l['href'], + descend=descend - 1) + + # If the HTML contains a list of part categories, then give up. + if tree.find('form', id='keywordSearchForm') is not None: + logger.log(DEBUG_OBSESSIVE,'Found high-level part categories for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError diff --git a/kicost/distributors/farnell/__init__.py b/kicost/distributors/farnell/__init__.py new file mode 100644 index 000000000..ff6eca676 --- /dev/null +++ b/kicost/distributors/farnell/__init__.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +__author__='Giacinto Luigi Cerone' + +from .farnell import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'farnell': { + 'module': 'farnell', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'Farnell', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ' ', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#FF6600' # Farnell/E14 orange. + } + } + } +) diff --git a/kicost/distributors/farnell/farnell.py b/kicost/distributors/farnell/farnell.py new file mode 100644 index 000000000..9bee57415 --- /dev/null +++ b/kicost/distributors/farnell/farnell.py @@ -0,0 +1,181 @@ +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE +from currency_converter import CurrencyConverter +currency = CurrencyConverter() + +__author__='Giacinto Luigi Cerone' + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the farnell product page.''' + price_tiers = {} + try: + qty_strs = [] + for qty in html_tree.find( + 'table', + class_=('tableProductDetailPrice', 'pricing')).find_all( + 'td', + class_='qty'): + qty_strs.append(qty.text) + price_strs = [] + for price in html_tree.find( + 'table', + class_=('tableProductDetailPrice', 'pricing')).find_all( + 'td', + class_='threeColTd'): + price_strs.append(price.text) + qtys_prices = list(zip(qty_strs, price_strs)) + for qty_str, price_str in qtys_prices: + try: + qty = re.search('(\s*)([0-9,]+)', qty_str).group(2) + qty = int(re.sub('[^0-9]', '', qty)) + price_str=price_str.replace(',','.') + price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str)) + price_tiers[qty] = currency.convert(price_tiers[qty], 'EUR', 'USD') + except (TypeError, AttributeError, ValueError): + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + return price_tiers # Return empty price tiers. + return price_tiers + +def get_part_num(html_tree): + '''Get the part number from the farnell product page.''' + try: + # farnell catalog number is stored in a description list, so get + # all the list terms and descriptions, strip all the spaces from those, + # and pair them up. + div = html_tree.find('div', class_='productDescription').find('dl') + dt = [re.sub('\s','',d.text) for d in div.find_all('dt')] + dd = [re.sub('\s','',d.text) for d in div.find_all('dd')] + dtdd = {k:v for k,v in zip(dt,dd)} # Pair terms with descriptions. +# return dtdd['farnellPartNo.:'] + return dtdd['CodiceProdotto'] + except KeyError: + return '' # No catalog number found in page. + except AttributeError: + return '' # No ProductDescription found in page. + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the farnell product page.''' + try: + qty_str = html_tree.find('p', class_='availabilityHeading').text + except (AttributeError, ValueError): + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + try: + qty = re.sub('[^0-9]','',qty_str) # Strip all non-number chars. + return int(re.sub('[^0-9]', '', qty_str)) # Return integer for quantity. + except ValueError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the farnell HTML page for a part number and return the URL and parse tree.''' + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: +# url = 'http://www.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote( +# pn + ' ' + extra_search_terms, +# safe='') + url = 'http://it.farnell.com/webapp/wcs/stores/servlet/Search?catalogId=15001&langId=-4&storeId=10165&gs=true&st=' + urlquote( + pn + ' ' + extra_search_terms, + safe='') + + elif url[0] == '/': + url = 'http://www.farnell.com' + url + elif url.startswith('..'): + url = 'http://www.farnell.com/Search/' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + for _ in range(HTML_RESPONSE_RETRIES): + try: + req = FakeBrowser(url) + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) + raise PartHtmlError + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then just return it. + if tree.find('div', class_='productDisplay', id='page') is not None: + return tree, url + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('table', class_='productLister', id='sProdList') is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find('table', + class_='productLister', + id='sProdList').find_all('tr', + class_='altRow') + + # Extract the product links for the part numbers from the table. + product_links = [] + for p in products: + try: + product_links.append(p.find('td', class_='mftrPart').find('a')) + except AttributeError: + continue + + # Extract all the part numbers from the text portion of the links. + part_numbers = [l.text for l in product_links] + + # Look for the part number in the list that most closely matches the requested part number. + match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + + # Now look for the link that goes with the closest matching part number. + for l in product_links: + if l.text == match: + # Get the tree for the linked-to page and return that. + logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) + return get_part_html_tree(dist, pn, extra_search_terms, + url=l['href'], descend=descend-1) + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError diff --git a/kicost/distributors/local/__init__.py b/kicost/distributors/local/__init__.py new file mode 100644 index 000000000..0525c1b1c --- /dev/null +++ b/kicost/distributors/local/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +__author__ = 'XESS Corporation' +__email__ = 'info@xess.com' + +from .local import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'local_template': { + 'module': 'local', # The directory name containing this file. + 'scrape': 'local', # Allowable values: 'web' or 'local'. + 'label': 'Local', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ' ', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#008000' # Darker green. + } + } + } +) diff --git a/kicost/distributors/local/local.py b/kicost/distributors/local/local.py new file mode 100644 index 000000000..5d7add0f5 --- /dev/null +++ b/kicost/distributors/local/local.py @@ -0,0 +1,114 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE +from ...kicost import SEPRTR + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the local product page.''' + price_tiers = {} + try: + pricing = html_tree.find('div', class_='pricing').text + pricing = re.sub('[^0-9.;:]', '', pricing) # Keep only digits, decimals, delimiters. + for qty_price in pricing.split(';'): + qty, price = qty_price.split(SEPRTR) + price_tiers[int(qty)] = float(price) + except AttributeError: + # This happens when no pricing info is found in the tree. + logger.log(DEBUG_OBSESSIVE, 'No local pricing information found!') + return price_tiers # Return empty price tiers. + return price_tiers + + +def get_part_num(html_tree): + '''Get the part number from the local product page.''' + try: + part_num_str = html_tree.find('div', class_='cat#').text + return part_num_str + except AttributeError: + return '' + + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the local product page.''' + try: + qty_str = html_tree.find('div', class_='quantity').text + except (AttributeError, ValueError): + # Return 0 (not None) so this part will show in the spreadsheet + # even if there is no quantity found. + return 0 + try: + return int(re.sub('[^0-9]', '', qty_str)) + except ValueError: + # Return 0 (not None) so this part will show in the spreadsheet + # even if there is no quantity found. + logger.log(DEBUG_OBSESSIVE, 'No local part quantity found!') + return 0 + + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=None, local_part_html=None): + '''Extract the HTML tree from the HTML page for local parts.''' + + # Extract the HTML tree from the local part HTML page. + try: + tree = BeautifulSoup(local_part_html, 'lxml') + except Exception: + raise PartHtmlError + + try: + # Find the DIV in the tree for the given part and distributor. + class_ = dist + SEPRTR + pn + part_tree = tree.find('div', class_=class_) + url_tree = part_tree.find('div', class_='link') + try: + # Return the part data tree and any URL associated with the part. + return part_tree, url_tree.text.strip() + except AttributeError: + # Return part data tree and None if the URL is not found. + return part_tree, None + except AttributeError: + # Return an error if the part_tree is not found. + raise PartHtmlError diff --git a/kicost/distributors/mouser/__init__.py b/kicost/distributors/mouser/__init__.py new file mode 100644 index 000000000..741de6fc3 --- /dev/null +++ b/kicost/distributors/mouser/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +__author__ = 'XESS Corporation' +__email__ = 'info@xess.com' + +from .mouser import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'mouser': { + 'module': 'mouser', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'Mouser', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ' ', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#004A85' # Mouser blue. + } + } + } +) diff --git a/kicost/distributors/mouser/mouser.py b/kicost/distributors/mouser/mouser.py new file mode 100644 index 000000000..3bb4f656c --- /dev/null +++ b/kicost/distributors/mouser/mouser.py @@ -0,0 +1,191 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the Mouser product page.''' + price_tiers = {} + try: + qty_strs = [] + for qty in html_tree.find('div', + class_='PriceBreaks').find_all( + 'div', + class_='PriceBreakQuantity'): + qty_strs.append(qty.text) + price_strs = [] + for price in html_tree.find('div', + class_='PriceBreaks').find_all( + 'div', + class_='PriceBreakPrice'): + price_strs.append(price.text) + qtys_prices = list(zip(qty_strs, price_strs)) + for qty_str, price_str in qtys_prices: + try: + qty = re.search('(\s*)([0-9,]+)', qty_str).group(2) + qty = int(re.sub('[^0-9]', '', qty)) + price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str)) + except (TypeError, AttributeError, ValueError, IndexError): + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + logger.log(DEBUG_OBSESSIVE, 'No Mouser pricing information found!') + return price_tiers # Return empty price tiers. + return price_tiers + + +def get_part_num(html_tree): + '''Get the part number from the Mouser product page.''' + try: + return re.sub('\n', '', html_tree.find('div', + id='divMouserPartNum').text) + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'No Mouser part number found!') + return '' + + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the Mouser product page.''' + try: + qty_str = html_tree.find('div', + id='availability').find( + 'div', + class_='av-row').find( + 'div', + class_='av-col2').text + except AttributeError as e: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + logger.log(DEBUG_OBSESSIVE, 'No Mouser part quantity found!') + return None + try: + qty_str = re.search('(\s*)([0-9,]*)', qty_str, re.IGNORECASE).group(2) + return int(re.sub('[^0-9]', '', qty_str)) + except ValueError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + logger.log(DEBUG_OBSESSIVE, 'No Mouser part quantity found!') + return None + + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the Mouser HTML page for a part number and return the URL and parse tree.''' + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: + url = 'http://www.mouser.com/Search/Refine.aspx?Keyword=' + urlquote( + pn + ' ' + extra_search_terms, + safe='') + elif url[0] == '/': + url = 'http://www.mouser.com' + url + elif url.startswith('..'): + url = 'http://www.mouser.com/Search/' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + req = FakeBrowser(url) + req.add_header('Cookie', 'preferences=ps=www2&pl=en-US&pc_www2=USDe') + for _ in range(HTML_RESPONSE_RETRIES): + try: + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) + raise PartHtmlError + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then just return it. + if tree.find('div', id='product-details') is not None: + return tree, url + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('table', class_='SearchResultsTable') is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find( + 'table', + class_='SearchResultsTable').find_all( + 'tr', + class_=('SearchResultsRowOdd', 'SearchResultsRowEven')) + + # Extract the product links for the part numbers from the table. + product_links = [p.find('div', class_='mfrDiv').a for p in products] + + # Extract all the part numbers from the text portion of the links. + part_numbers = [l.text for l in product_links] + + # Look for the part number in the list that most closely matches the requested part number. + match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + + # Now look for the link that goes with the closest matching part number. + for l in product_links: + if l.text == match: + # Get the tree for the linked-to page and return that. + logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) + return get_part_html_tree(dist, pn, extra_search_terms, + url=l['href'], descend=descend-1) + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError diff --git a/kicost/distributors/newark/__init__.py b/kicost/distributors/newark/__init__.py new file mode 100644 index 000000000..2ee262c9c --- /dev/null +++ b/kicost/distributors/newark/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +__author__ = 'XESS Corporation' +__email__ = 'info@xess.com' + +from .newark import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'newark': { + 'module': 'newark', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'Newark', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ',', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#A2AE06' # Newark/E14 olive green. + } + } + } +) diff --git a/kicost/distributors/newark/newark.py b/kicost/distributors/newark/newark.py new file mode 100644 index 000000000..6090eca58 --- /dev/null +++ b/kicost/distributors/newark/newark.py @@ -0,0 +1,202 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the Newark product page.''' + price_tiers = {} + try: + qty_strs = [] + for qty in html_tree.find( + 'table', + class_=('tableProductDetailPrice', 'pricing')).find_all( + 'td', + class_='qty'): + qty_strs.append(qty.text) + price_strs = [] + for price in html_tree.find( + 'table', + class_=('tableProductDetailPrice', 'pricing')).find_all( + 'td', + class_='threeColTd'): + price_strs.append(price.text) + qtys_prices = list(zip(qty_strs, price_strs)) + for qty_str, price_str in qtys_prices: + try: + qty = re.search('(\s*)([0-9,]+)', qty_str).group(2) + qty = int(re.sub('[^0-9]', '', qty)) + price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str)) + except (TypeError, AttributeError, ValueError): + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + logger.log(DEBUG_OBSESSIVE, 'No Newark pricing information found!') + return price_tiers # Return empty price tiers. + return price_tiers + + +def get_part_num(html_tree): + '''Get the part number from the Newark product page.''' + try: + # Newark catalog number is stored in a description list, so get + # all the list terms and descriptions, strip all the spaces from those, + # and pair them up. + div = html_tree.find('div', class_='productDescription').find('dl') + dt = [re.sub('\s','',d.text) for d in div.find_all('dt')] + dd = [re.sub('\s','',d.text) for d in div.find_all('dd')] + dtdd = {k:v for k,v in zip(dt,dd)} # Pair terms with descriptions. + return dtdd['NewarkPartNo.:'] + except KeyError: + logger.log(DEBUG_OBSESSIVE, 'No Newark catalog number found!') + return '' # No catalog number found in page. + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'No Newark product description found!') + return '' # No ProductDescription found in page. + + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the Newark product page.''' + try: + qty_str = html_tree.find('p', class_='availabilityHeading').text + except (AttributeError, ValueError): + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + try: + qty = re.sub('[^0-9]','',qty_str) # Strip all non-number chars. + return int(re.sub('[^0-9]', '', qty_str)) # Return integer for quantity. + except ValueError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + logger.log(DEBUG_OBSESSIVE, 'No Newark part quantity found!') + return None + + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the Newark HTML page for a part number and return the URL and parse tree.''' + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: + url = 'http://www.newark.com/webapp/wcs/stores/servlet/Search?catalogId=15003&langId=-1&storeId=10194&gs=true&st=' + urlquote( + pn + ' ' + extra_search_terms, + safe='') + elif url[0] == '/': + url = 'http://www.newark.com' + url + elif url.startswith('..'): + url = 'http://www.newark.com/Search/' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + for _ in range(HTML_RESPONSE_RETRIES): + try: + req = FakeBrowser(url) + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then just return it. + if tree.find('div', class_='productDisplay', id='page') is not None: + return tree, url + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('table', class_='productLister', id='sProdList') is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find('table', + class_='productLister', + id='sProdList').find('tbody').find_all('tr') + + # Extract the product links for the part numbers from the table. + product_links = [] + for p in products: + try: + product_links.append( + p.find('td', class_='mftrPart').find('a')) + except AttributeError: + continue + + # Extract all the part numbers from the text portion of the links. + part_numbers = [l.text for l in product_links] + + # Look for the part number in the list that most closely matches the requested part number. + try: + match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + except IndexError: + raise PartHtmlError + + # Now look for the link that goes with the closest matching part number. + for l in product_links: + if l.text == match: + # Get the tree for the linked-to page and return that. + logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) + return get_part_html_tree(dist, pn, extra_search_terms, + url=l['href'], descend=descend-1) + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError diff --git a/kicost/distributors/rs/__init__.py b/kicost/distributors/rs/__init__.py new file mode 100644 index 000000000..7ee749adb --- /dev/null +++ b/kicost/distributors/rs/__init__.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +__author__='Giacinto Luigi Cerone' + +from .rs import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'rs': { + 'module': 'rs', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'RS Components', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ' ', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#FF0000' # RS Components red. + } + } + } +) diff --git a/kicost/distributors/rs/rs.py b/kicost/distributors/rs/rs.py new file mode 100644 index 000000000..1d8366e0c --- /dev/null +++ b/kicost/distributors/rs/rs.py @@ -0,0 +1,209 @@ +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE +from currency_converter import CurrencyConverter +currency = CurrencyConverter() + + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the RS Components product page.''' + price_tiers = {} + + try: + qty_strs = [] + for qty in html_tree.find_all('div',class_='breakRangeWithoutUnit', itemprop='eligibleQuantity'): + qty_strs.append(qty.text) + price_strs = [] + for price in html_tree.find_all('div', class_='unitPrice'): + if price.text is not u'': + price_strs.append(price.text) + qtys_prices = list(zip(qty_strs, price_strs)) + for qty_str, price_str in qtys_prices: + try: + qty = re.search('(\s*)([0-9,]+)', qty_str).group(2) + qty = int(re.sub('[^0-9]', '', qty)) + price_str=price_str.replace(',','.') + price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str)) + price_tiers[qty] = currency.convert(price_tiers[qty], 'EUR', 'USD') + except (TypeError, AttributeError, ValueError): + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + return price_tiers # Return empty price tiers. + return price_tiers + +def get_part_num(html_tree): + '''Get the part number from the farnell product page.''' + try: + pn_str = html_tree.find('span', class_='keyValue bold', itemprop='sku').text + pn = re.sub('[^0-9\-]','', pn_str) + return pn + except KeyError: + return '' # No catalog number found in page. + except AttributeError: + return '' # No ProductDescription found in page. + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the farnell product page.''' + + try: + # Note that 'availability' is misspelled in the container class name! + qty_str = html_tree.find('div', class_='floatLeft stockMessaging availMessageDiv bottom5').text + except (AttributeError, ValueError): + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + try: + qty = re.sub('[^0-9]','',qty_str[0:10]) # Strip all non-number chars. + return int(qty) # Return integer for quantity. + except ValueError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + return None + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the RS Components HTML page for a part number and return the URL and parse tree.''' + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: + url = 'http://it.rs-online.com/web/c/?searchTerm=' + urlquote(pn + ' ' + extra_search_terms, safe='') + + elif url[0] == '/': + url = 'http://it.rs-online.com' + url + elif url.startswith('..'): + url = 'http://it.rs-online.com/Search/' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + for _ in range(HTML_RESPONSE_RETRIES): + try: + req = FakeBrowser(url) + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then just return it. + if tree.find('div', class_='specTableContainer') is not None: + return tree, url + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('div', class_='srtnPageContainer') is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find_all('tr', class_='resultRow') + + # Extract the product links for the part numbers from the table. + product_links= [] + for p in products: + try: + product_links.append(p.find('a',class_='primarySearchLink')['href']) + # Up to now get the first url found in the list. i.e. do not choose the url based on the stock type (e.g. single unit, reel etc.) + return get_part_html_tree(dist, pn, extra_search_terms,url=product_links[0], descend=descend-1) + except AttributeError: + continue + except TypeError: + #~ print('****************dist:',dist,'pn:**************************',pn) + continue + + + + #~ # If the tree is for a list of products, then examine the links to try to find the part number. + #~ if tree.find('div', class_='srtnPageContainer') is not None: + #~ if descend <= 0: + #~ raise PartHtmlError + #~ else: + #~ # Look for the table of products. + #~ products = tree.find('table', + #~ class_='productLister', + #~ id='sProdList').find_all('tr', + #~ class_='altRow') + + #~ # Extract the product links for the part numbers from the table. + #~ product_links = [] + #~ for p in products: + #~ try: + #~ product_links.append( + #~ p.find('td', + #~ class_='mftrPart').find('p', + #~ class_='wordBreak').a) + #~ except AttributeError: + #~ continue + + #~ # Extract all the part numbers from the text portion of the links. + #~ part_numbers = [l.text for l in product_links] + + #~ # Look for the part number in the list that most closely matches the requested part number. + #~ match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + + #~ # Now look for the link that goes with the closest matching part number. + #~ for l in product_links: + #~ if l.text == match: + #~ # Get the tree for the linked-to page and return that. + #~ return get_part_html_tree(dist, pn, extra_search_terms, + #~ url=l['href'], descend=descend-1) + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError + +if __name__=='__main__': + + #~ html_tree=get_part_html_tree(dist='rs',pn='MSP430F5438AIPZ') + #~ html_tree=get_part_html_tree(dist='rs',pn='CC3200-LAUNCHXL') + #~ html_tree=get_part_html_tree(dist='rs',pn='LM358PW') + html_tree=get_part_html_tree(dist='rs',pn='MCP1252-33X50I/MS') + + pt=get_price_tiers(html_tree[0]) + qt=get_qty_avail(html_tree[0]) + pn=get_part_num(html_tree[0]) + print('****************') + print(pt) + print('****************') + print(qt) + print('****************') + print(pn) + print('****************') + + diff --git a/kicost/distributors/tme/__init__.py b/kicost/distributors/tme/__init__.py new file mode 100644 index 000000000..c20b95fac --- /dev/null +++ b/kicost/distributors/tme/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +__author__ ='Adam Heinrich' +__email__ = 'adam@adamh.cz' + +from .tme import * + +# Place information about this distributor into the distributor dictionary. +from .. import distributors +distributors.update( + { + 'tme': { + 'module': 'tme', # The directory name containing this file. + 'scrape': 'web', # Allowable values: 'web' or 'local'. + 'label': 'TME', # Distributor label used in spreadsheet columns. + 'order_cols': ['part_num', 'purch', 'refs'], # Sort-order for online orders. + 'order_delimiter': ' ', # Delimiter for online orders. + # Formatting for distributor header in worksheet. + 'wrk_hdr_format': { + 'font_size': 14, + 'font_color': 'white', + 'bold': True, + 'align': 'center', + 'valign': 'vcenter', + 'bg_color': '#0C4DA1' # TME blue + } + } + } +) diff --git a/kicost/distributors/tme/tme.py b/kicost/distributors/tme/tme.py new file mode 100644 index 000000000..3e31e8c57 --- /dev/null +++ b/kicost/distributors/tme/tme.py @@ -0,0 +1,230 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +import re +import difflib +import json +from bs4 import BeautifulSoup +import http.client # For web scraping exceptions. +from .. import urlencode, urlquote, urlsplit, urlunsplit, urlopen, Request +from .. import HTML_RESPONSE_RETRIES +from .. import WEB_SCRAPE_EXCEPTIONS +from .. import FakeBrowser +from ...kicost import PartHtmlError +from ...kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE + +def __ajax_details(pn): + '''Load part details from TME using XMLHttpRequest''' + data = urlencode({ + 'symbol': pn, + 'currency': 'USD' + }).encode("utf-8") + req = FakeBrowser('http://www.tme.eu/en/_ajax/ProductInformationPage/_getStocks.html') + req.add_header('X-Requested-With', 'XMLHttpRequest') + for _ in range(HTML_RESPONSE_RETRIES): + try: + response = urlopen(req, data) + r = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No AJAX data for {} from {}'.format(pn, dist)) + return None, None + + try: + r = r.decode('utf-8') # Convert bytes to string in Python 3. + p = json.loads(r)['Products'][0] + html_tree = BeautifulSoup(p['PriceTpl'].replace("\n", ""), "lxml") + quantity = p['InStock'] + return html_tree, quantity + except (ValueError, KeyError, IndexError): + logger.log(DEBUG_OBSESSIVE, 'Could not obtain AJAX data from TME!') + return None, None + +def get_price_tiers(html_tree): + '''Get the pricing tiers from the parsed tree of the TME product page.''' + price_tiers = {} + try: + pn = get_part_num(html_tree) + if pn == '': + return price_tiers + + ajax_tree, quantity = __ajax_details(pn) + if ajax_tree is None: + return price_tiers + + qty_strs = [] + price_strs = [] + for tr in ajax_tree.find('tbody', id='prices_body').find_all('tr'): + td = tr.find_all('td') + if len(td) == 3: + qty_strs.append(td[0].text) + price_strs.append(td[2].text) + + qtys_prices = list(zip(qty_strs, price_strs)) + for qty_str, price_str in qtys_prices: + try: + qty = re.search('(\s*)([0-9,]+)', qty_str).group(2) + qty = int(re.sub('[^0-9]', '', qty)) + price_tiers[qty] = float(re.sub('[^0-9\.]', '', price_str)) + except (TypeError, AttributeError, ValueError, IndexError): + continue + except AttributeError: + # This happens when no pricing info is found in the tree. + logger.log(DEBUG_OBSESSIVE, 'No TME pricing information found!') + return price_tiers # Return empty price tiers. + return price_tiers + + +def get_part_num(html_tree): + '''Get the part number from the TME product page.''' + try: + return html_tree.find('td', class_="pip-product-symbol").text + except AttributeError: + logger.log(DEBUG_OBSESSIVE, 'No TME part number found!') + return '' + + +def get_qty_avail(html_tree): + '''Get the available quantity of the part from the TME product page.''' + pn = get_part_num(html_tree) + if pn == '': + logger.log(DEBUG_OBSESSIVE, 'No TME part quantity found!') + return None + + ajax_tree, qty_str = __ajax_details(pn) + if qty_str is None: + return None + + try: + return int(qty_str) + except ValueError: + # No quantity found (not even 0) so this is probably a non-stocked part. + # Return None so the part won't show in the spreadsheet for this dist. + logger.log(DEBUG_OBSESSIVE, 'No TME part quantity found!') + return None + + +def get_part_html_tree(dist, pn, extra_search_terms='', url=None, descend=2, local_part_html=None): + '''Find the TME HTML page for a part number and return the URL and parse tree.''' + + # Use the part number to lookup the part using the site search function, unless a starting url was given. + if url is None: + url = 'http://www.tme.eu/en/katalog/?search=' + urlquote( + pn + ' ' + extra_search_terms, + safe='') + elif url[0] == '/': + url = 'http://www.tme.eu' + url + + # Open the URL, read the HTML from it, and parse it into a tree structure. + req = FakeBrowser(url) + for _ in range(HTML_RESPONSE_RETRIES): + try: + response = urlopen(req) + html = response.read() + break + except WEB_SCRAPE_EXCEPTIONS: + logger.log(DEBUG_DETAILED,'Exception while web-scraping {} from {}'.format(pn, dist)) + pass + else: # Couldn't get a good read from the website. + logger.log(DEBUG_OBSESSIVE,'No HTML page for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # Abort if the part number isn't in the HTML somewhere. + # (Only use the numbers and letters to compare PN to HTML.) + if re.sub('[\W_]','',str.lower(pn)) not in re.sub('[\W_]','',str.lower(str(html))): + logger.log(DEBUG_OBSESSIVE,'No part number {} in HTML page from {} ({})'.format(pn, dist, url)) + raise PartHtmlError + + try: + tree = BeautifulSoup(html, 'lxml') + except Exception: + logger.log(DEBUG_OBSESSIVE,'No HTML tree for {} from {}'.format(pn, dist)) + raise PartHtmlError + + # If the tree contains the tag for a product page, then just return it. + if tree.find('div', id='ph') is not None: + return tree, url + + # If the tree is for a list of products, then examine the links to try to find the part number. + if tree.find('table', id="products") is not None: + logger.log(DEBUG_OBSESSIVE,'Found product table for {} from {}'.format(pn, dist)) + if descend <= 0: + logger.log(DEBUG_OBSESSIVE,'Passed descent limit for {} from {}'.format(pn, dist)) + raise PartHtmlError + else: + # Look for the table of products. + products = tree.find( + 'table', + id="products").find_all( + 'tr', + class_=('product-row')) + + # Extract the product links for the part numbers from the table. + product_links = [] + for p in products: + for a in p.find('div', class_='manufacturer').find_all('a'): + product_links.append(a) + + # Extract all the part numbers from the text portion of the links. + part_numbers = [l.text for l in product_links] + + # Look for the part number in the list that most closely matches the requested part number. + match = difflib.get_close_matches(pn, part_numbers, 1, 0.0)[0] + + # Now look for the link that goes with the closest matching part number. + for l in product_links: + if (not l['href'].startswith('./katalog')) and l.text == match: + # Get the tree for the linked-to page and return that. + logger.log(DEBUG_OBSESSIVE,'Selecting {} from product table for {} from {}'.format(l.text, pn, dist)) + # TODO: The current implementation does up to four HTTP + # requests per part (search, part details page for TME P/N, + # XHR for pricing information, and XHR for stock + # availability). This is mainly for the compatibility with + # other distributor implementations (html_tree gets passed + # to all functions). + # A modified implementation (which would pass JSON data + # obtained by the XHR instead of the HTML DOM tree) might be + # able to do the same with just two requests (search for TME + # P/N, XHR for pricing and stock availability). + return get_part_html_tree(dist, pn, extra_search_terms, + url=l['href'], descend=descend-1) + + # I don't know what happened here, so give up. + logger.log(DEBUG_OBSESSIVE,'Unknown error for {} from {}'.format(pn, dist)) + raise PartHtmlError diff --git a/kicost/eda_tools/__init__.py b/kicost/eda_tools/__init__.py new file mode 100644 index 000000000..7679bf455 --- /dev/null +++ b/kicost/eda_tools/__init__.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Author information. +__author__ = 'Hildo Guillardi Junior' +__webpage__ = 'https://github.com/hildogjr/' +__company__ = 'University of Campinas - Brazil' + +import os + + +# Reference string order to the spreadsheet. Use this to +# group the elements in sequencial rows. +BOM_ORDER = 'u,q,d,t,y,x,c,r,s,j,p,cnn,con' + + +# The distributor module directories will be found in this directory. +directory = os.path.dirname(__file__) + +# Search for the distributor modules and import them. +for module in os.listdir(os.path.dirname(__file__)): + + # Avoid importing non-directories. + abs_module = os.path.join(directory, module) + if not os.path.isdir(abs_module): + continue + + # Avoid directories like __pycache__. + if module.startswith('__'): + continue + + # Import the module. + __import__(module, globals(), locals(), [], level=1) + +from .subparts import * # Subparts and sub quantities rotines. diff --git a/kicost/eda_tools/altium/__init__.py b/kicost/eda_tools/altium/__init__.py new file mode 100644 index 000000000..bcf49d5a8 --- /dev/null +++ b/kicost/eda_tools/altium/__init__.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Author information. +__author__ = 'Hildo Guillardi Junior' +__webpage__ = 'https://github.com/hildogjr/' +__company__ = 'University of Campinas - Brazil' + +from .altium import get_part_groups_altium diff --git a/kicost/eda_tools/altium/altium.py b/kicost/eda_tools/altium/altium.py new file mode 100644 index 000000000..9c6a3c778 --- /dev/null +++ b/kicost/eda_tools/altium/altium.py @@ -0,0 +1,143 @@ +# Inserted by Pasteurize tool. +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import zip +from builtins import range +from builtins import int +from builtins import str +from future import standard_library +standard_library.install_aliases() + +import future + +from bs4 import BeautifulSoup +import logging + +logger = logging.getLogger('kicost') + +DEBUG_OVERVIEW = logging.DEBUG +DEBUG_DETAILED = logging.DEBUG-1 +DEBUG_OBSESSIVE = logging.DEBUG-2 + +import sys + +SEPRTR = ':' # Delimiter between library:component, distributor:field, etc. + +# Temporary class for storing part group information. +class IdenticalComponents(object): + pass + +def get_part_groups_altium(in_file, ignore_fields, variant): + '''Get groups of identical parts from an XML file and return them as a dictionary.''' + + ign_fields = [str(f.lower()) for f in ignore_fields] + + + def extract_fields(part, variant): + '''Extract XML fields from the part in a library or schematic.''' + + fields = {} + + if sys.version[0]=='2': + fields['footprint']=part['footprint1'].encode('ascii', 'ignore') + fields['libpart']=part['libref1'].encode('ascii', 'ignore') + fields['value']=part['value3'].encode('ascii', 'ignore') + fields['reference']=part['comment1'].encode('ascii', 'ignore') + fields['manf#']=part['manufacturer_part_number_11'].encode('ascii', 'ignore') + else: + fields['footprint']=part['footprint1'] + fields['libpart']=part['libref1'] + fields['value']=part['value3'] + fields['reference']=part['comment1'] + fields['manf#']=part['manufacturer_part_number_11'] + + return fields + + # Read-in the schematic XML file to get a tree and get its root. + logger.log(DEBUG_OVERVIEW, 'Get schematic XML...') + root = BeautifulSoup(in_file, 'lxml') + + # Make a dictionary from the fields in the parts library so these field + # values can be instantiated into the individual components in the schematic. + logger.log(DEBUG_OVERVIEW, 'Get parts library...') + libparts = {} + component_groups = {} + + for p in root.find('rows').find_all('row'): + + # Get the values for the fields in each library part (if any). + fields = extract_fields(p, variant) + + # Store the field dict under the key made from the + # concatenation of the library and part names. + #~ libparts[str(fields['libpart'] + SEPRTR + fields['reference'])] = fields + libparts[fields['libpart'] + SEPRTR + fields['reference']] = fields + + # Also have to store the fields under any part aliases. + try: + for alias in p.find('aliases').find_all('alias'): + libparts[str(fields['libpart'] + SEPRTR + alias.string)] = fields + except AttributeError: + pass # No aliases for this part. + + hash_fields = {k: fields[k] for k in fields if k not in ('manf#','manf') and SEPRTR not in k} + h = hash(tuple(sorted(hash_fields.items()))) + + component_groups[h] = IdenticalComponents() # Add empty structure. + component_groups[h].fields = fields + component_groups[h].refs = p['designator1'].replace(' ','').split(',') # Init list of refs with first ref. + # Now add the manf. part num (or None) for this part to the group set. + component_groups[h].manf_nums = set([fields.get('manf#')]) + + # Now we have groups of seemingly identical parts. But some of the parts + # within a group may have different manufacturer's part numbers, and these + # groups may need to be split into smaller groups of parts all having the + # same manufacturer's number. Here are the cases that need to be handled: + # One manf# number: All parts have the same manf#. Don't split this group. + # Two manf# numbers, but one is None: Some of the parts have no manf# but + # are otherwise identical to the other parts in the group. Don't split + # this group. Instead, propagate the non-None manf# to all the parts. + # Two manf#, neither is None: All parts have non-None manf# numbers. + # Split the group into two smaller groups of parts all having the same + # manf#. + # Three or more manf#: Split this group into smaller groups, each one with + # parts having the same manf#, even if it's None. It's impossible to + # determine which manf# the None parts should be assigned to, so leave + # their manf# as None. + new_component_groups = [] # Copy new component groups into this. + for g, grp in list(component_groups.items()): + num_manf_nums = len(grp.manf_nums) + if num_manf_nums == 1: + new_component_groups.append(grp) + continue # Single manf#. Don't split this group. + elif num_manf_nums == 2 and None in grp.manf_nums: + new_component_groups.append(grp) + continue # Two manf#, but one of them is None. Don't split this group. + # Otherwise, split the group into subgroups, each with the same manf#. + for manf_num in grp.manf_nums: + sub_group = IdenticalComponents() + sub_group.manf_nums = [manf_num] + sub_group.refs = [] + for ref in grp.refs: + # Use get() which returns None if the component has no manf# field. + # That will match if the group manf_num is also None. + if components[ref].get('manf#') == manf_num: + sub_group.refs.append(ref) + new_component_groups.append(sub_group) + + prj_info = {'title':'test_title','company':'test_company'} # Not implemented yet. + + # Now return the list of identical part groups. + return new_component_groups, prj_info + + # Now return a list of the groups without their hash keys. + return list(new_component_groups.values()), prj_info + +if __name__=='__main__': + + file_handle=open('meacs.xml') + #~ file_handle=open('wiSensAFE.xml') + + get_part_groups_altium(file_handle,'','') diff --git a/kicost/eda_tools/subparts.py b/kicost/eda_tools/subparts.py new file mode 100644 index 000000000..3171153d0 --- /dev/null +++ b/kicost/eda_tools/subparts.py @@ -0,0 +1,203 @@ +# MIT license +# +# Copyright (C) 2015 by XESS Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +# Libraries. +import re # Regular expression parser. +#from ..kicost import distributors +from ..kicost import logger, DEBUG_OVERVIEW, DEBUG_DETAILED, DEBUG_OBSESSIVE +distributors = ['rs','digikey','mouser','newark','farnell'] + +# Author information. +__author__ = 'Hildo Guillardi Junior' +__webpage__ = 'https://github.com/hildogjr/' +__company__ = 'University of Campinas - Brazil' + +__all__ = ['subpart_split','subpart_qty'] + +QTY_SSTR = '[\:]' # String that separate the subpart quantity and the + # manufacture/distributor code. +PART_SSTR = '[\;\,]' # String that separate the part (manufacture/ + # distributor code) in the list. +SUB_SSTR = ''#'.' # String to separete the subpart in the new reference create. + +# Definitions to parse the manufature / distributor code to allow +# sub parts and diferent quantities (even fraction) in these. + + + +# components = subpart_split(accepted_components) + +# ------------------ Public functions + +def subpart_split(components): + # Take each part and the all manufacture/distributors combination + # possibility to split in subpart the components part that have + # more than one manufacture/distributors code. + # For each designator... + logger.log(DEBUG_OVERVIEW, 'Search for subpart in the designed parts...') + designator = list(components.keys()) + dist = [d+'#' for d in distributors] + dist.append('manf#') + for parts_index in range(len(designator)): + part = components[designator[parts_index]] + try: + # Divide the subparts in diferent parts keeping the other fields + # (reference, description, ...). + # First search for the used filed to manufacture/distributor numbers + # and how many subparts are in them. Use the loop also to extract the + # manufacture/distributor codes in list. + founded_fields = [] + subparts_qty = 0 + subparts_manf = dict() + for field_code in dist: + if field_code in part: + subparts_qty = max(subparts_qty, + len( subpart_list(part[field_code]) ) ) # Quantity of sub parts. + founded_fields += [field_code] + subparts_manf[field_code] = subpart_list(part[field_code]) + if not founded_fields: + continue # If not manf/distributor code pass to next. + if logger.isEnabledFor(DEBUG_DETAILED): + print(designator,'>>',founded_fields) + # Second, if more than one subpart, split the sub parts as + # new components with the same description, footprint, and + # so on... Get the subpar + if subparts_qty>1: + # Remove the actual part from the list. + part_actual = components.pop(designator[parts_index]) + part_actual_value = part_actual['value'] + # Add the splited subparts. + for subparts_index in range(0,subparts_qty): + # Create a sub component based on the main component with + # the subparts. Modity the designator and the part. Create + # a sub quantity field. + subpart_actual = part_actual + for field_manf in founded_fields: + # For each manufacture/distributor code take the same order of + # the code list and split in each subpart. When not founded one + # part, do not add. + # e.g. U1:{'manf#':'PARTG1;PARTG2;PARTG3', 'mouser#''PARTM1;PARTM2'} + # result: + # U1.1:{'manf#':'PARTG1', 'mouser#':'PARTM1'} + # U1.2:{'manf#':'PARTG2', 'mouser#':'PARTM2'} + # U1.3:{'manf#':'PARTG3'} + try: + p_manf = subparts_manf[field_manf][subparts_index] + subpart_qty, subpart_part = subpart_qtypart(p_manf) + subpart_actual['value'] = '{v} - p{idx}/{total}'.format( + v=part_actual_value, + idx=subparts_index+1, + total=subparts_qty) + subpart_actual[field_manf] = subpart_part + subpart_actual[field_manf+'_subqty'] = subpart_qty + if logger.isEnabledFor(DEBUG_OBSESSIVE): + print(subpart_actual) + except IndexError: + pass + ref = designator[parts_index] + SUB_SSTR + str(subparts_index + 1) + components.update({ref:subpart_actual.copy()}) + except KeyError: + continue + return components + + +def subpart_qty(component): + # Calculate the string of the quantity of the item parsing the + # referente (design) quantity and the sub quantity (in case that + # was a sub part of a manufacture/distributor code). + try: + if logger.isEnabledFor(DEBUG_OBSESSIVE): + print('Qty>>',component.refs,'>>', + component.fields.get('manf#_subqty'), '*', + component.fields.get('manf#')) + subqty = component.fields.get('manf#_subqty') + string = '={{}}*{qty}'.format(qty=len(component.refs)) + if subqty != '1' and subqty != None: + string = '=CEILING({{}}*({subqty})*{qty},1)'.format( + subqty=subqty, + qty=len(component.refs)) + else: + string = '={{}}*{qty}'.format(qty=len(component.refs)) + except (KeyError, TypeError): + if logger.isEnabledFor(DEBUG_OBSESSIVE): + print('Qty>>',component.refs,'>>',len(component.refs)) + string = '={{}}*{qty}'.format(qty=len(component.refs)) + return string + + + +# ------------------ Private functions + +def subpart_list(part): + # Get the list f sub parts manufacture / distributor code + # numbers striping the spaces and keeping the sub part + # quantity information, these have to be separated by + # PART_SSTR definition. + return re.split('(? ('4.5', 'ADUM3150BRSZ-RL7') + # '4/5 : ADUM3150BRSZ-RL7' -> ('4/5', 'ADUM3150BRSZ-RL7') + # '7:ADUM3150BRSZ-RL7' -> ('7', 'ADUM3150BRSZ-RL7') + # 'ADUM3150BRSZ-RL7 : 7' -> ('7', 'ADUM3150BRSZ-RL7') + # 'ADUM3150BRSZ-RL7' -> ('1', 'ADUM3150BRSZ-RL7') + # 'ADUM3150BRSZ-RL7:' -> ('1', 'ADUM3150BRSZ-RL7') forgot the qty understood '1' + strings = re.split('\s*' + QTY_SSTR + '\s*', subpart) + if len(strings)==2: + # Search for numbers, matching with simple, frac and decimal ones. + num_format = re.compile("^\s*[\-\+]?\s*[0-9]*\s*[\.\/]*\s*?[0-9]*\s*$") + string0_test = re.match(num_format, strings[0]) + string1_test = re.match(num_format, strings[1]) + if string0_test and not(string1_test): + qty = strings[0].strip() + part = strings[1].strip() + elif not(string0_test) and string1_test: + qty = strings[1].strip() + part = strings[0].strip() + elif string0_test and string1_test: + # May be founded a just numeric manufacture/distributor part, + # in this case, the quantity is a shortest string not + #considering "." and "/" marks. + if len(re.sub('[\.\/]','',strings[0])) < re.sub('[\.\/]','',len(strings[1])): + qty = strings[0].strip() + part = strings[1].strip() + else: + qty = strings[1].strip() + part = strings[0].strip() + else: + qty = '1' + part = strings[0].strip() + strings[1].strip() + if qty=='': + qty = '1' + else: + qty = '1' + part = ''.join(strings) + if logger.isEnabledFor(DEBUG_OBSESSIVE): + print('part/qty>>', subpart, '\t\tpart>>', part, '\tqty>>', qty) + return qty, part diff --git a/kicost/kicost.py b/kicost/kicost.py index d78819628..15ecbe7b1 100644 --- a/kicost/kicost.py +++ b/kicost/kicost.py @@ -36,21 +36,25 @@ import sys import pprint +import copy import re # Regular expression parser. import difflib import logging import tqdm import os from bs4 import BeautifulSoup # XML file interpreter. -from random import randint import xlsxwriter # XLSX file interpreter. from xlsxwriter.utility import xl_rowcol_to_cell, xl_range, xl_range_abs from yattag import Doc, indent # For generating HTML page for local parts. import multiprocessing from multiprocessing import Pool # For running web scrapes in parallel. -import http.client # For web scraping exceptions. from datetime import datetime +try: + from urllib.parse import urlsplit, urlunsplit +except ImportError: + from urlparse import quote as urlsplit, urlunsplit + # Stops UnicodeDecodeError exceptions. try: reload(sys) @@ -58,25 +62,10 @@ except NameError: pass # Happens if reload is attempted in Python 3. -def FakeBrowser(url): - req = Request(url) - req.add_header('Accept-Language', 'en-US') - req.add_header('User-agent', get_user_agent()) - return req - class PartHtmlError(Exception): '''Exception for failed retrieval of an HTML parse tree for a part.''' pass -try: - from urllib.parse import urlencode, quote as urlquote, urlsplit, urlunsplit - import urllib.request - from urllib.request import urlopen, Request -except ImportError: - from urlparse import quote as urlquote, urlsplit, urlunsplit - from urllib import urlencode - from urllib2 import urlopen, Request - # ghost library allows scraping pages that have Javascript challenge pages that # screen-out robots. Digi-Key stopped doing this, so it's not needed at the moment. # Also requires installation of Qt4.8 (not 5!) and pyside. @@ -84,36 +73,20 @@ class PartHtmlError(Exception): __all__ = ['kicost'] # Only export this routine for use by the outside world. -# Used to get the names of functions in this module so they can be called dynamically. -THIS_MODULE = locals() - -ALL_MODULES = globals() - SEPRTR = ':' # Delimiter between library:component, distributor:field, etc. -HTML_RESPONSE_RETRIES = 2 # Num of retries for getting part data web page. - -WEB_SCRAPE_EXCEPTIONS = (urllib.request.URLError, http.client.HTTPException) -# Global array of distributor names. -distributors = {} - logger = logging.getLogger('kicost') - - DEBUG_OVERVIEW = logging.DEBUG DEBUG_DETAILED = logging.DEBUG-1 DEBUG_OBSESSIVE = logging.DEBUG-2 +# Import other EDA importer routines. # Altium requires a different part grouping function than KiCad. -from .altium.altium import get_part_groups_altium +from .eda_tools import * -# Import web scraping functions for various distributor websites. -from .local import * -from .digikey import * -from .newark import * -from .mouser import * -from .rs import * -from .farnell import * +# Import information about various distributors. +from . import distributors as distributor_imports +distributors = distributor_imports.distributors # Generate a dictionary to translate all the different ways people might want # to refer to part numbers, vendor numbers, and such. @@ -177,6 +150,7 @@ def kicost(in_file, out_filename, user_fields, ignore_fields, variant, num_proce include_dist_list = list(distributors.keys()) rmv_dist = set(exclude_dist_list) rmv_dist |= set(list(distributors.keys())) - set(include_dist_list) + rmv_dist -= set(['local_template']) # We need this later for creating non-web distributors. for dist in rmv_dist: distributors.pop(dist, None) @@ -414,7 +388,7 @@ def extract_fields(part, variant): #print('Removed parts:', set(components.keys())-set(accepted_components.keys())) # Replace the component list with the list of accepted parts. - components = accepted_components + components = subpart_split(accepted_components) # Now partition the parts into groups of like components. # First, get groups of identical components but ignore any manufacturer's @@ -501,6 +475,51 @@ def extract_fields(part, variant): grp_fields[key] = val grp.fields = grp_fields + # Put the components groups in the spreadsheet rows in a spefic order + # using the reference string of the components. The order is defined + # by BOM_ORDER. + ref_identifiers = re.split('(?0: + # If found more than one group with the reference, use the 'manf#' + # as second order criterian. + if len(component_groups_ref_match)>1: + try: + for item in component_groups_ref_match: + component_groups_order_old.remove(item) + except ValueError: + pass + # Examine 'manf#' to get the order. + group_manf_list = [new_component_groups[h].fields.get('manf#') for h in component_groups_ref_match] + if group_manf_list: + m=group_manf_list + sorted_groups = sorted(range(len(group_manf_list)), key=lambda k:(group_manf_list[k] is None, group_manf_list[k])) +# [i[0] for i in sorted(enumerate(group_manf_list), key=lambda x:x[1])] + if logger.isEnabledFor(DEBUG_OBSESSIVE): + print(group_manf_list,' > order: ', sorted_groups) + component_groups_ref_match = [component_groups_ref_match[i] for i in sorted_groups] + component_groups_order_new += component_groups_ref_match + else: + try: + component_groups_order_old.remove(component_groups_ref_match[0]) + except ValueError: + pass + component_groups_order_new += component_groups_ref_match + # The new order is the found refs firt and at the last the not referenced in BOM_ORDER. + component_groups_order_new += component_groups_order_old # Add the missing references groups. + new_component_groups = [new_component_groups[i] for i in component_groups_order_new] + # Now return the list of identical part groups. return new_component_groups, prj_info @@ -526,14 +545,14 @@ def create_local_part_html(parts): dist = key[:key.index(SEPRTR)] except ValueError: continue + + # If the distributor is not in the list of web-scrapable distributors, + # then it's a local distributor. Copy the local distributor template + # and add it to the table of distributors. if dist not in distributors: - distributors[dist] = { - 'scrape': 'local', - 'function': 'local', - 'label': dist, - 'order_cols': ['purch', 'part_num', 'refs'], - 'order_delimiter': '' - } + distributors[dist] = copy.copy(distributors['local_template']) + distributors[dist]['label'] = dist # Set dist name for spreadsheet header. + # Now look for catalog number, price list and webpage link for this part. for dist in distributors: cat_num = p.fields.get(dist+':cat#') @@ -562,6 +581,11 @@ def make_random_catalog_number(p): link = urlunsplit(url_parts) with tag('div', klass='link'): text(link) + + # Remove the local distributor template so it won't be processed later on. + # It has served its purpose. + del distributors['local_template'] + html = doc.getvalue() if logger.isEnabledFor(DEBUG_OBSESSIVE): print(indent(html)) @@ -596,64 +620,6 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia 'valign': 'vcenter', 'bg_color': '#303030' }), - 'digikey': workbook.add_format({ - 'font_size': 14, - 'font_color': 'white', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#CC0000' # Digi-Key red. - }), - 'mouser': workbook.add_format({ - 'font_size': 14, - 'font_color': 'white', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#004A85' # Mouser blue. - }), - 'newark': workbook.add_format({ - 'font_size': 14, - 'font_color': 'white', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#A2AE06' # Newark/E14 olive green. - }), - 'rs': workbook.add_format({ - 'font_size': 14, - 'font_color': 'white', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#FF0000' # RS Components red. - }), - 'farnell': workbook.add_format({ - 'font_size': 14, - 'font_color': 'white', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#FF6600' # Farnell/E14 orange. - }), - 'local_lbl': [ - workbook.add_format({ - 'font_size': 14, - 'font_color': 'black', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#909090' # Darker grey. - }), - workbook.add_format({ - 'font_size': 14, - 'font_color': 'black', - 'bold': True, - 'align': 'center', - 'valign': 'vcenter', - 'bg_color': '#c0c0c0' # Lighter grey. - }), - ], 'header': workbook.add_format({ 'font_size': 12, 'bold': True, @@ -692,8 +658,10 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia 'num_format': '$#,##0.00', 'valign': 'vcenter' }), - 'founded_perc': workbook.add_format({ + 'found_part_pct': workbook.add_format({ 'font_size': 12, + 'bold': True, + 'italic': True, 'valign': 'vcenter' }), 'proj_info_field': workbook.add_format({ @@ -708,15 +676,19 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia 'valign': 'vcenter' }), 'best_price': workbook.add_format({'bg_color': '#80FF80', }), - 'insufficient_qty': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}), + 'not_available': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}), + 'order_too_much': workbook.add_format({'bg_color': '#FF0000', 'font_color':'white'}), + 'too_few_available': workbook.add_format({'bg_color': '#FF9900', 'font_color':'black'}), + 'too_few_purchased': workbook.add_format({'bg_color': '#FFFF00'}), 'not_stocked': workbook.add_format({'font_color': '#909090', 'align': 'right' }), - 'not_purchased' : workbook.add_format({'bg_color': '#FFFF00'}), - 'not_founded' : workbook.add_format({'bg_color': '#FF0000'}), - 'not_enough' : workbook.add_format({'bg_color': '#FFFF00'}), 'currency': workbook.add_format({'num_format': '$#,##0.00'}), 'centered_text': workbook.add_format({'align': 'center'}), } + # Add the distinctive header format for each distributor to the dict of formats. + for d in distributors: + wrk_formats[d] = workbook.add_format(distributors[d]['wrk_hdr_format']) + # Create the worksheet that holds the pricing information. wks = workbook.add_worksheet(WORKSHEET_NAME) @@ -793,13 +765,11 @@ def create_spreadsheet(parts, prj_info, spreadsheet_filename, user_fields, varia dist_list = web_dists + local_dists # Load the part information from each distributor into the sheet. - index = 0 for dist in dist_list: dist_start_col = next_col - next_col = add_dist_to_worksheet(wks, wrk_formats, index, START_ROW, + next_col = add_dist_to_worksheet(wks, wrk_formats, START_ROW, dist_start_col, UNIT_COST_ROW, TOTAL_COST_ROW, refs_col, qty_col, dist, parts) - index = (index+1) % 2 # Create a defined range for each set of distributor part data. workbook.define_name( '{}_part_data'.format(dist), '={wks_name}!{data_range}'.format( @@ -931,7 +901,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, 'level': 0, 'label': 'Manf#', 'width': None, - 'comment': 'Manufacturer number for each part.\nRed -> Not founded parts\nYellow -> Not enough aval.', + 'comment': 'Manufacturer number for each part.', 'static': True, }, 'qty': { @@ -939,7 +909,10 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, 'level': 0, 'label': 'Qty', 'width': None, - 'comment': 'Total number of each part needed to assemble the board.\nYellow -> Not purchased part enough.', + 'comment': '''Total number of each part needed to assemble the board. +Red -> No parts available. +Orange -> Parts available, but not enough. +Yellow -> Enough parts available, but haven't purchased enough.''', 'static': False, }, 'unit_price': { @@ -947,8 +920,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, 'level': 0, 'label': 'Unit$', 'width': None, - 'comment': - 'Minimum unit price for each part across all distributors.', + 'comment': 'Minimum unit price for each part across all distributors.', 'static': False, }, 'ext_price': { @@ -956,8 +928,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, 'level': 0, 'label': 'Ext$', 'width': 15, # Displays up to $9,999,999.99 without "###". - 'comment': - 'Minimum extended price for each part across all distributors.', + 'comment': 'Minimum extended price for each part across all distributors.', 'static': False, }, } @@ -980,7 +951,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, 'level': 0, 'label': user_field, 'width': None, - 'comment': 'User-defined field', + 'comment': 'User-defined field.', 'static': True, } @@ -1027,73 +998,86 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, # Enter total part quantity needed. try: + part_qty = subpart_qty(part); wks.write(row, start_col + columns['qty']['col'], - '=BoardQty*{}'.format(len(part.refs))) + part_qty.format('BoardQty') ) + # '=BoardQty*{}'.format(len(part.refs))) except KeyError: pass - - # Enter spreadsheet formula for getting the minimum unit price from all the distributors. + # Gather the cell references for calculating minimum unit price and part availability. dist_unit_prices = [] - dist_purchased_qty = [] - qty_not_enough = [] - part_not_founded = [] + dist_qty_avail = [] + dist_qty_purchased = [] for dist in list(distributors.keys()): + # Get the name of the data range for this distributor. - dist_part_data_range = '{}_part_data'.format(dist) + dist_data_rng = '{}_part_data'.format(dist) + # Get the contents of the unit price cell for this part (row) and distributor (column+offset). dist_unit_prices.append( - 'INDIRECT(ADDRESS(ROW(),COLUMN({})+2))'.format( - dist_part_data_range)) - # Get the purchased quantity cell reference. - dist_purchased_qty.append( - 'IF(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({dist_part})+2))),INDIRECT(ADDRESS(ROW(),COLUMN({dist_part})+1)),0)'.format(dist_part=dist_part_data_range)) - # Get the contents of the unit price cell for this part (row) and distributor (column+offset). - qty_not_enough.append( - 'INDIRECT(ADDRESS(ROW(),COLUMN({})))'.format( - dist_part_data_range)) - # Get the contents of the unit price cell for this part (row) and distributor (column+offset). - part_not_founded.append( - 'NOT(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({})+2))))'.format( - dist_part_data_range)) - # Create the function that finds the minimum of all the distributor unit price cells for this part. - wks.write(row, start_col + columns['unit_price']['col'], - '=MINA({})'.format(','.join(dist_unit_prices)), - wrk_formats['currency']) - # Create a function that warnning the user if he do not purche the necessary quantity. - wks.conditional_format(row, start_col + columns['qty']['col'], - row, start_col + columns['qty']['col'], { - 'type': 'cell', - 'criteria': '>', - 'value': '=SUM({})'.format(','.join(dist_purchased_qty)), - 'format': wrk_formats['not_purchased'] - }) - # Create a function that error if not found part in any distributor. - # Add first to be prioritary to the next one. - wks.conditional_format(row, start_col + columns['manf#']['col'], - row, start_col + columns['manf#']['col'], { - 'type': 'formula', - 'criteria': '=AND({})'.format(','.join(part_not_founded)), - 'format': wrk_formats['not_founded'] - }) - # Create a function that warnning if not avaliable the necessary quantity. - wks.conditional_format(row, start_col + columns['manf#']['col'], - row, start_col + columns['manf#']['col'], { - 'type': 'formula', - 'criteria': '=SUM({formula})<{qty_needed}'.format( - formula=','.join(qty_not_enough), - qty_needed=xl_rowcol_to_cell(row, start_col + columns['qty']['col'])), - 'format': wrk_formats['not_enough'] - }) + 'INDIRECT(ADDRESS(ROW(),COLUMN({})+2))'.format(dist_data_rng)) + + # Get the contents of the quantity purchased cell for this part and distributor + # unless the unit price is not a number in which case return 0. + dist_qty_purchased.append( + 'IF(ISNUMBER(INDIRECT(ADDRESS(ROW(),COLUMN({0})+2))),INDIRECT(ADDRESS(ROW(),COLUMN({0})+1)),0)'.format(dist_data_rng)) + + # Get the contents of the quantity available cell of this part from this distributor. + dist_qty_avail.append( + 'INDIRECT(ADDRESS(ROW(),COLUMN({})+0))'.format(dist_data_rng)) - # Enter spreadsheet formula for calculating minimum extended price. + # Enter the spreadsheet formula to find this part's minimum unit price across all distributors. + wks.write_formula( + row, start_col + columns['unit_price']['col'], + '=MINA({})'.format(','.join(dist_unit_prices)), + wrk_formats['currency'] + ) + + # Enter the spreadsheet formula for calculating the minimum extended price. wks.write_formula( row, start_col + columns['ext_price']['col'], '=iferror({qty}*{unit_price},"")'.format( - qty=xl_rowcol_to_cell(row, start_col + columns['qty']['col']), - unit_price=xl_rowcol_to_cell(row, start_col + - columns['unit_price']['col'])), - wrk_formats['currency']) + qty = xl_rowcol_to_cell(row, start_col + columns['qty']['col']), + unit_price = xl_rowcol_to_cell(row, start_col + columns['unit_price']['col']) + ), + wrk_formats['currency'] + ) + + # If part is unavailable from all distributors, color quantity cell red. + wks.conditional_format( + row, start_col + columns['qty']['col'], + row, start_col + columns['qty']['col'], + { + 'type': 'formula', + 'criteria': '=IF(SUM({})=0,1,0)'.format(','.join(dist_qty_avail)), + 'format': wrk_formats['not_available'] + } + ) + + # If total available part quantity is less than needed quantity, color cell orange. + wks.conditional_format( + row, start_col + columns['qty']['col'], + row, start_col + columns['qty']['col'], + { + 'type': 'cell', + 'criteria': '>', + 'value': '=SUM({})'.format(','.join(dist_qty_avail)), + 'format': wrk_formats['too_few_available'] + } + ) + + # If total purchased part quantity is less than needed quantity, color cell yellow. + wks.conditional_format( + row, start_col + columns['qty']['col'], + row, start_col + columns['qty']['col'], + { + 'type': 'cell', + 'criteria': '>', + 'value': '=SUM({})'.format(','.join(dist_qty_purchased)), + 'format': wrk_formats['too_few_purchased'], + } + ) # Enter part shortage quantity. try: @@ -1116,7 +1100,7 @@ def add_globals_to_worksheet(wks, wrk_formats, start_row, start_col, return start_col + num_cols, start_col + columns['refs']['col'], start_col + columns['qty']['col'] -def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, +def add_dist_to_worksheet(wks, wrk_formats, start_row, start_col, unit_cost_row, total_cost_row, part_ref_col, part_qty_col, dist, parts): '''Add distributor-specific part data to the spreadsheet.''' @@ -1129,15 +1113,16 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, 'level': 1, # Outline level (or hierarchy level) for this column. 'label': 'Avail', # Column header label. 'width': None, # Column width (default in this case). - 'comment': 'Available quantity of each part at the distributor.\nRed -> necessary quantity is not available.' - # Column header tool-tip. + 'comment': '''Available quantity of each part at the distributor. +Red -> No quantity available. +Orange -> Too little quantity available.''' }, 'purch': { 'col': 1, 'level': 2, 'label': 'Purch', 'width': None, - 'comment': 'Purchase quantity of each part from this distributor.' + 'comment': 'Purchase quantity of each part from this distributor.\nRed -> Purchasing more than the available quantity.' }, 'unit_price': { 'col': 2, @@ -1152,7 +1137,7 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, 'label': 'Ext$', 'width': 15, # Displays up to $9,999,999.99 without "###". 'comment': - '(Unit Price) x (Purchase Qty) of each part from this distributor.\nRed -> next price break is cheaper.\nGreen -> cheapest supplier.' + '(Unit Price) x (Purchase Qty) of each part from this distributor.\nRed -> Next price break is cheaper.\nGreen -> Cheapest supplier.' }, 'part_num': { 'col': 4, @@ -1167,12 +1152,8 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, row = start_row # Start building distributor section at this row. # Add label for this distributor. - try: - wks.merge_range(row, start_col, row, start_col + num_cols - 1, + wks.merge_range(row, start_col, row, start_col + num_cols - 1, distributors[dist]['label'].title(), wrk_formats[dist]) - except KeyError: - wks.merge_range(row, start_col, row, start_col + num_cols - 1, - distributors[dist]['label'].title(), wrk_formats['local_lbl'][index]) row += 1 # Go to next row. # Add column headers, comments, and outline level (for hierarchy). @@ -1252,6 +1233,7 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, # Sort the tiers based on quantities and turn them into lists of strings. qtys = sorted(price_tiers.keys()) + avail_qty_col = start_col + columns['avail']['col'] purch_qty_col = start_col + columns['purch']['col'] unit_price_col = start_col + columns['unit_price']['col'] ext_price_col = start_col + columns['ext_price']['col'] @@ -1272,20 +1254,45 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, for q in qtys[1:]: # Skip the first qty which is always 0. price_break_info += '\n{:>6d} {:>7s} {:>10s}'.format( q, - '${:.3f}'.format(price_tiers[q]), + '${:.2f}'.format(price_tiers[q]), '${:.2f}'.format(price_tiers[q] * q)) wks.write_comment(row, unit_price_col, price_break_info) + # Conditional format to show no quantity is available. + wks.conditional_format( + row, start_col + columns['avail']['col'], + row, start_col + columns['avail']['col'], + { + 'type': 'cell', + 'criteria': '==', + 'value': 0, + 'format': wrk_formats['not_available'] + } + ) + # Conditional format to show the avaliable quantity is less than required. - wks.conditional_format(row, start_col + columns['avail']['col'], - row, start_col + columns['avail']['col'], { + wks.conditional_format( + row, start_col + columns['avail']['col'], + row, start_col + columns['avail']['col'], + { 'type': 'cell', 'criteria': '<', - 'value': '=iferror(if({purch_qty}="",{needed_qty},{purch_qty}),"")'.format( - needed_qty=xl_rowcol_to_cell(row, part_qty_col), - purch_qty=xl_rowcol_to_cell(row, purch_qty_col)), - 'format': wrk_formats['insufficient_qty'] - }) + 'value': xl_rowcol_to_cell(row, part_qty_col), + 'format': wrk_formats['too_few_available'] + } + ) + + # Conditional format to show the purchase quantity is more than what is available. + wks.conditional_format( + row, start_col + columns['purch']['col'], + row, start_col + columns['purch']['col'], + { + 'type': 'cell', + 'criteria': '>', + 'value': xl_rowcol_to_cell(row, avail_qty_col), + 'format': wrk_formats['order_too_much'] + } + ) # Conditionally format the unit price cell that contains the best price. wks.conditional_format(row, unit_price_col, row, unit_price_col, { @@ -1324,13 +1331,13 @@ def add_dist_to_worksheet(wks, wrk_formats, index, start_row, start_col, PART_INFO_LAST_ROW, total_cost_col)), wrk_formats['total_cost_currency']) - # Show the percentual of founded components. + # Show how many parts were found at this distributor. wks.write(unit_cost_row, total_cost_col, - '=(ROWS({count_range})-COUNTBLANK({count_range}))&"/"&ROWS({count_range})&" founded"'.format( + '=(ROWS({count_range})-COUNTBLANK({count_range}))&" of "&ROWS({count_range})&" parts found"'.format( count_range=xl_range(PART_INFO_FIRST_ROW, total_cost_col, PART_INFO_LAST_ROW, total_cost_col)), - wrk_formats['founded_perc']) - wks.write_comment(unit_cost_row, total_cost_col, 'Founded components in this distributor.') + wrk_formats['found_part_pct']) + wks.write_comment(unit_cost_row, total_cost_col, 'Number of parts found at this distributor.') # Add list of part numbers and purchase quantities for ordering from this distributor. ORDER_START_COL = start_col + 1 @@ -1448,15 +1455,19 @@ def enter_order_info(info_col, order_col, numeric=False, delimiter=''): num_to_text_func=num_to_text_func, num_to_text_fmt=num_to_text_fmt))) - # Write the header and how many parts is purchasing. + # Write the header and how many parts are being purchased. + purch_qty_col = start_col + columns['purch']['col'] ORDER_HEADER = PART_INFO_LAST_ROW + 2 - wks.write(ORDER_HEADER, purch_qty_col, - '=IFERROR(IF(OR({count_range}),"Purch cart: "&COUNTIF({count_range},">0")&"/"&ROWS({count_range})&" purchased",""),"")'.format( - count_range=xl_range(PART_INFO_FIRST_ROW, purch_qty_col, - PART_INFO_LAST_ROW, purch_qty_col)), - wrk_formats['founded_perc']) + wks.write_formula( + ORDER_HEADER, purch_qty_col, + '=IFERROR(IF(OR({count_range}),COUNTIF({count_range},">0")&" of "&ROWS({count_range})&" parts purchased",""),"")'.format( + count_range=xl_range(PART_INFO_FIRST_ROW, purch_qty_col, + PART_INFO_LAST_ROW, purch_qty_col) + ), + wrk_formats['found_part_pct'] + ) wks.write_comment(ORDER_HEADER, purch_qty_col, - 'Copy the code bellow to the distributor web site importer.') + 'Copy the information below to the BOM import page of the distributor web site.') # For every column in the order info range, enter the part order information. for col_tag in ('purch', 'part_num', 'refs'): @@ -1467,40 +1478,10 @@ def enter_order_info(info_col, order_col, numeric=False, delimiter=''): return start_col + num_cols # Return column following the globals so we know where to start next set of cells. -def get_user_agent(): - # The default user_agent_list comprises chrome, IE, firefox, Mozilla, opera, netscape. - # for more user agent strings,you can find it in http://www.useragentstring.com/pages/useragentstring.php - user_agent_list = [ - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1", - "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6", - "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6", - "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5", - "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", - "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3", - "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3", - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24", - "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24" - ] - return user_agent_list[randint(0, len(user_agent_list) - 1)] - - -def get_part_html_tree(part, dist, distributor_dict, local_part_html, logger): +def get_part_html_tree(part, dist, get_html_tree_func, local_part_html, logger): '''Get the HTML tree for a part from the given distributor website or local HTML.''' logger.log(DEBUG_OBSESSIVE, '%s %s', dist, str(part.refs)) - - # Get function name for getting the HTML tree for this part from this distributor. - function = distributor_dict[dist]['function'] - get_dist_part_html_tree = THIS_MODULE['get_{}_part_html_tree'.format(function)] for extra_search_terms in set([part.fields.get('manf', ''), '']): try: @@ -1509,7 +1490,7 @@ def get_part_html_tree(part, dist, distributor_dict, local_part_html, logger): # 2) the manufacturer's part number. for key in (dist+'#', dist+SEPRTR+'cat#', 'manf#'): if key in part.fields: - return get_dist_part_html_tree(dist, part.fields[key], extra_search_terms, local_part_html=local_part_html) + return get_html_tree_func(dist, part.fields[key], extra_search_terms, local_part_html=local_part_html) # No distributor or manufacturer number, so give up. else: logger.warning("No '%s#' or 'manf#' field: cannot lookup part %s at %s", dist, part.refs, dist) @@ -1546,19 +1527,18 @@ def scrape_part(args): # Scrape the part data from each distributor website or the local HTML. for d in distributor_dict: - # Get the HTML tree for the part. - html_tree, url[d] = get_part_html_tree(part, d, distributor_dict, local_part_html, scrape_logger) + try: + dist_module = getattr(distributor_imports, d) + except AttributeError: + dist_module = getattr(distributor_imports, distributor_dict[d]['module']) - # Get the function names for getting the part data from the HTML tree. - function = distributor_dict[d]['function'] - get_dist_price_tiers = THIS_MODULE['get_{}_price_tiers'.format(function)] - get_dist_part_num = THIS_MODULE['get_{}_part_num'.format(function)] - get_dist_qty_avail = THIS_MODULE['get_{}_qty_avail'.format(function)] + # Get the HTML tree for the part. + html_tree, url[d] = get_part_html_tree(part, d, dist_module.get_part_html_tree, local_part_html, scrape_logger) # Call the functions that extract the data from the HTML tree. - part_num[d] = get_dist_part_num(html_tree) - qty_avail[d] = get_dist_qty_avail(html_tree) - price_tiers[d] = get_dist_price_tiers(html_tree) + part_num[d] = dist_module.get_part_num(html_tree) + qty_avail[d] = dist_module.get_qty_avail(html_tree) + price_tiers[d] = dist_module.get_price_tiers(html_tree) # Return the part data. return id, url, part_num, price_tiers, qty_avail