-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Importing ecospold1 processes exported from openLCA #126
Comments
hi @sc-gcoste , did you find a way to import agribalyse? I am facing the same issue... thanks! |
Hi @renaud, unfortunately no... |
proposed fix for brightway-lca#126
same problem here |
1 similar comment
same problem here |
same here |
I experienced a similar issue, but not with agribalyse. Would recommend looking through your XML files for any apparent issues from OpenLCA. In my case, some tags were not linked and there were a few empty XML files and tweaking functions ``
@classmethod
def is_valid_ecospold1(cls, dataset):
try:
ref_func = dataset.metaInformation.processInformation.referenceFunction
name = ref_func.get("name").strip()
unit = ref_func.get("unit")
categories = [ref_func.get("category"), ref_func.get("subCategory")]
code = int(dataset.get("number"))
location = dataset.metaInformation.processInformation.technology.get("text")
technology = dataset.metaInformation.processInformation.technology.get("text")
# time_period = getattr2(dataset.metaInformation.processInformation, "timePeriod").get("text")
production_volume = getattr2(dataset.metaInformation.modellingAndValidation, "representativeness").get("productionVolume")
# sampling = getattr2(dataset.metaInformation.modellingAndValidation, "representativeness").get("samplingProcedure"),
# extrapolations = getattr2(dataset.metaInformation.modellingAndValidation, "representativeness").get("extrapolations")
# uncertainty = getattr2(dataset.metaInformation.modellingAndValidation, "representativeness").get("uncertaintyAdjustments")
# Checking exchanges
for exc in dataset.flowData.iterchildren():
if exc.tag == "comment":
continue
if exc.tag in ("{http://www.EcoInvent.org/EcoSpold01}exchange", "exchange"):
if hasattr(exc, "outputGroup"):
if exc.outputGroup.text in {"0", "2", "3"}:
pass
elif exc.outputGroup.text == "1":
pass
elif exc.outputGroup.text == "4":
pass
else:
raise ValueError(
"Can't understand output group {}".format(exc.outputGroup.text)
)
else:
if exc.inputGroup.text in {"1", "2", "3", "5"}:
kind = "technosphere"
elif exc.inputGroup.text == "4":
kind = "biosphere" # Resources
else:
raise ValueError(
"Can't understand input group {}".format(exc.inputGroup.text)
)
elif exc.tag in (
"{http://www.EcoInvent.org/EcoSpold01}allocation",
"allocation",
):
reference = int(exc.get("referenceToCoProduct")),
fraction = float(exc.get("fraction")),
exchanges = [int(c.text) for c in exc.iterchildren() if c.tag != "comment"],
else:
raise ValueError("Flow data type %s not understood" % exc.tag)
return True
except Exception as e:
print(f"Error message: {e}")
return False
# except AttributeError:
# return False
@classmethod
def process_dataset(cls, dataset, filename, db_name):
ref_func = dataset.metaInformation.processInformation.referenceFunction
def get_comment():
try:
comments = [
ref_func.get("generalComment"),
ref_func.get("includedProcesses"),
(
"Location: ",
dataset.metaInformation.processInformation.geography.get("text"),
),
(
"Technology: ",
dataset.metaInformation.processInformation.technology.get("text"),
),
(
"Time period: ",
getattr2(dataset.metaInformation.processInformation, "timePeriod").get(
"text"
),
),
(
"Production volume: ",
getattr2(
dataset.metaInformation.modellingAndValidation, "representativeness"
).get("productionVolume"),
),
(
"Sampling: ",
getattr2(
dataset.metaInformation.modellingAndValidation, "representativeness"
).get("samplingProcedure"),
),
(
"Extrapolations: ",
getattr2(
dataset.metaInformation.modellingAndValidation, "representativeness"
).get("extrapolations"),
),
(
"Uncertainty: ",
getattr2(
dataset.metaInformation.modellingAndValidation, "representativeness"
).get("uncertaintyAdjustments"),
),
]
comment = "\n".join(
[
(" ".join(x) if isinstance(x, tuple) else x)
for x in comments
if (x[1] if isinstance(x, tuple) else x)
]
)
return comment
except:
return []
def get_authors():
try:
ai = dataset.metaInformation.administrativeInformation
data_entry = []
for elem in ai.iterchildren():
if "dataEntryBy" in elem.tag:
data_entry.append(elem.get("person"))
fields = [
("address", "address"),
("company", "companyCode"),
("country", "countryCode"),
("email", "email"),
("name", "name"),
]
authors = []
for elem in ai.iterchildren():
if "person" in elem.tag and elem.get("number") in data_entry:
authors.append({label: elem.get(code) for label, code in fields})
return authors
except:
return []
data = {
"categories": [ref_func.get("category"), ref_func.get("subCategory")],
"code": int(dataset.get("number")),
"comment": get_comment(),
"authors": get_authors(),
"database": db_name,
"exchanges": cls.process_exchanges(dataset),
"filename": filename,
"location": dataset.metaInformation.processInformation.geography.get(
"location"
),
"name": ref_func.get("name").strip(),
"type": "process",
"unit": ref_func.get("unit"),
}
try:
allocation_exchanges = [
exc for exc in data["exchanges"] if exc.get("reference")
]
except:
allocation_exchanges = []
if allocation_exchanges != []:
data["allocations"] = allocation_exchanges
data["exchanges"] = [exc for exc in data["exchanges"] if exc.get("type")]
return data
`` Hope this helps! |
Dear everyone, apologies for not seeing this or responding earlier. We just merged new import xmlschema
import pyecospold
from pathlib import Path
xsd = Path(pyecospold.__file__).parent / "schemas" / "v1" / "EcoSpold01Dataset.xsd"
def get_validation_errors(xml_file: Path, xsd_file: Path):
schema = xmlschema.XMLSchema(xsd_file)
validation_error_iterator = schema.iter_errors(open(xml_file).read())
for idx, validation_error in enumerate(validation_error_iterator, start=1):
print(f'[{idx}]\n\tpath: {validation_error.path}\n\treason: {validation_error.reason}')
get_validation_errors(
"process_000f29c8-0b4b-32f7-96f7-e0f29530d2fb.xml",
xsd
) Gives the following errors:
My inclination is to not support files which are very invalid - it would mean writing much more complicated code and would also make testing quite difficult. Note that openLCA is the only ones publishing invalid ecospold 1/2 files - even the big boys do it sometimes. However, we can make adjustments to the schema if there is a good reason. You can find the @tngTUDOR @jsvgoncalves FYI and feel free to express your opinion. |
It is not so visible in the user interface, but in the EcoSpold 1 export wizard in openLCA there is a second page when you click When I import the attached example dataset above and export it again with this option, it will generate a default start- and end-date: <timePeriod dataValidForEntirePeriod="true" text="Unspecified">
<startDate>9999-01-01+01:00</startDate>
<endDate>9999-12-31+01:00</endDate>
</timePeriod> and also a default person which is then linked as data generator etc.: <administrativeInformation>
<dataEntryBy person="1"/>
<dataGeneratorAndPublication
person="1"
dataPublishedIn="0"
copyright="true"
accessRestrictedTo="0"/>
<person
number="1"
name="default"
address="Created for EcoSpold 1 compatibility"
telephone="000"
companyCode="default"
countryCode="CH"/>
</administrativeInformation> edit: I think the dataset is then valid against the updated schema of |
Thanks a lot @msrocka! It might make sense to have that field checked by default - I am not sure what the specific business stories are to emit data which doesn't validate against the schema, but probably the default should be a valid file, even if some data is not usable. |
Agreed, would not try to fix very invalid files. But maybe we could try improving the error/exception information to make it a bit more obvious that the file is very invalid. |
I have an ecospold1 dataset extracted from openLCA and I would like to import it into Brightway2. Using the SingleOutputEcospold1Importer should read the ecospold files but apparently something is wrong in the file schema.
Code:
Output:
Process from Agribalyse3 imported to EcoSpold1 with openLCA (to unzip and place in the directory searched by the importer)
process_000f29c8-0b4b-32f7-96f7-e0f29530d2fb.zip
NB: When using
use_mp=True
I get multipleMultiprocessingError
inviting to rerun withuse_mp=False
.The text was updated successfully, but these errors were encountered: