Source code for lasio.las

"""The main Lasio class: LASFile."""

from __future__ import print_function

try:  # will work in Python 3
    from collections.abc import Sequence
except ImportError:  # Support Python 2.7
    from collections import Sequence

import csv
import json
import logging
import re
import traceback

# get basestring in py3

try:
    unicode = unicode
except NameError:
    # 'unicode' is undefined, must be Python 3
    unicode = str
    basestring = (str, bytes)
else:
    # 'unicode' exists, must be Python 2
    bytes = str
    basestring = basestring

# Required third-party packages available on PyPi:

import numpy as np

# internal lasio imports

from . import exceptions

# from .las_items import HeaderItem, CurveItem, SectionItems, OrderedDict
from .las_items import CurveItem
from . import defaults
from . import reader
from . import writer

logger = logging.getLogger(__name__)


[docs]class LASFile(object): """LAS file object. Keyword Arguments: file_ref (:term:`file-like object` or :class:`str`): either a filename, an open file object, or a string containing the contents of a file. ignore_header_errors (bool): ignore LASHeaderErrors (False by default) ignore_comments (sequence/str): ignore lines beginning with these characters e.g. ``("#", '"')`` in header sections. ignore_data_comments (str): ignore lines beginning with this character in data sections only. mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics 'upper': convert all HeaderItem mnemonics to uppercase 'lower': convert all HeaderItem mnemonics to lowercase ignore_data (bool): if True, do not read in any of the actual data, just the header metadata. False by default. engine (str): "normal": parse data section with normal Python reader (quite slow); "numpy": parse data section with `numpy.genfromtxt` (fast). By default the engine is "numpy". use_normal_engine_for_wrapped (bool): if header metadata indicates that the file is wrapped, always use the 'normal' engine. Default is True. The only reason you should use False is if speed is a very high priority and you had files with metadata that incorrectly indicates they are wrapped. read_policy (str or list): Apply regular expression substitutions for common errors in fixed-width formatted data sections. If you do not want any such substitutions to applied, pass ``read_policy=()``. null_policy (str or list): see https://lasio.readthedocs.io/en/latest/data-section.html#handling-invalid-data-indicators-automatically accept_regexp_sub_recommendations (bool): Accept recommendations to auto- matically remove read substitutions (applied by the default read_policy) which look for numeric run-on errors involving hyphens. This avoids incorrect parsing of dates such as '2018-05-22' as three separate columns containing '2018', '-5' and '-22'. The read substitutions are applied only if the inspection code of the data section finds a hyphen in every line. The only circumstance where this should be manually set to False is where you have very problematic fixed-column-width data sections involving negative values. index_unit (str): Optionally force-set the index curve's unit to "m" or "ft" dtypes ("auto", dict or list): specify the data types for each curve in the ~ASCII data section. If "auto", each curve will be converted to floats if possible and remain as str if not. If a dict you can specify only the curve mnemonics you want to convert as a key. If a list, please specify data types for each curve in order. Note that the conversion currently only occurs via numpy.ndarray.astype() and therefore only a few simple casts will work e.g. `int`, `float`, `str`. encoding (str): character encoding to open file_ref with, using :func:`io.open` (this is handled by :func:`lasio.reader.open_with_codecs`) encoding_errors (str): 'strict', 'replace' (default), 'ignore' - how to handle errors with encodings (see `this section <https://docs.python.org/3/library/codecs.html#codec-base-classes>`__ of the standard library's :mod:`codecs` module for more information) (this is handled by :func:`lasio.reader.open_with_codecs`) autodetect_encoding (str or bool): default True to use `chardet <https://github.com/chardet/chardet>`__ to detect encoding. Note if set to False several common encodings will be tried but chardet won't be used. (this is handled by :func:`lasio.reader.open_with_codecs`) autodetect_encoding_chars (int/None): number of chars to read from LAS file for auto-detection of encoding. (this is handled by :func:`lasio.reader.open_with_codecs`) The documented arguments above are combined from these methods: * :func:`lasio.reader.open_with_codecs` - manage issues relate to character encodings * :meth:`lasio.LASFile.read` - control how NULL values and errors are handled during parsing Attributes: encoding (str or None): the character encoding used when reading the file in from disk """ def __init__(self, file_ref=None, **read_kwargs): super(LASFile, self).__init__() self._text = "" self.index_unit = None self.index_initial = None default_items = defaults.get_default_items() self.sections = { "Version": default_items["Version"], "Well": default_items["Well"], "Curves": default_items["Curves"], "Parameter": default_items["Parameter"], "Other": str(default_items["Other"]), } if not (file_ref is None): self.read(file_ref, **read_kwargs)
[docs] def read( self, file_ref, ignore_header_errors=False, ignore_comments=("#",), ignore_data_comments="#", mnemonic_case="upper", ignore_data=False, engine="numpy", use_normal_engine_for_wrapped=True, read_policy="default", null_policy="strict", accept_regexp_sub_recommendations=True, index_unit=None, dtypes="auto", **kwargs, ): """Read a LAS file. Arguments: file_ref (:term:`file-like object` or :class:`str`): either a filename, an open file object, or a string containing the contents of a file. Keyword Arguments: ignore_header_errors (bool): ignore LASHeaderErrors (False by default) ignore_comments (sequence/str): ignore lines beginning with these characters e.g. ``("#", '"')`` in header sections. ignore_data_comments (str): ignore lines beginning with this character in data sections only. mnemonic_case (str): 'preserve': keep the case of HeaderItem mnemonics 'upper': convert all HeaderItem mnemonics to uppercase 'lower': convert all HeaderItem mnemonics to lowercase ignore_data (bool): if True, do not read in any of the actual data, just the header metadata. False by default. engine (str): "normal": parse data section with normal Python reader (quite slow); "numpy": parse data section with `numpy.genfromtxt` (fast). By default the engine is "numpy". use_normal_engine_for_wrapped (bool): if header metadata indicates that the file is wrapped, always use the 'normal' engine. Default is True. The only reason you should use False is if speed is a very high priority and you had files with metadata that incorrectly indicates they are wrapped. read_policy (str or list): Apply regular expression substitutions for common errors in fixed-width formatted data sections. If you do not want any such substitutions to applied, pass ``read_policy=()``. null_policy (str or list): see https://lasio.readthedocs.io/en/latest/data-section.html#handling-invalid-data-indicators-automatically accept_regexp_sub_recommendations (bool): Accept recommendations to auto- matically remove read substitutions (applied by the default read_policy) which look for numeric run-on errors involving hyphens. This avoids incorrect parsing of dates such as '2018-05-22' as three separate columns containing '2018', '-5' and '-22'. The read substitutions are applied only if the inspection code of the data section finds a hyphen in every line. The only circumstance where this should be manually set to False is where you have very problematic fixed-column-width data sections involving negative values. index_unit (str): Optionally force-set the index curve's unit to "m" or "ft" dtypes ("auto", dict or list): specify the data types for each curve in the ~ASCII data section. If "auto", each curve will be converted to floats if possible and remain as str if not. If a dict you can specify only the curve mnemonics you want to convert as a key. If a list, please specify data types for each curve in order. Note that the conversion currently only occurs via numpy.ndarray.astype() and therefore only a few simple casts will work e.g. `int`, `float`, `str`. encoding (str): character encoding to open file_ref with, using :func:`io.open` (this is handled by :func:`lasio.reader.open_with_codecs`) encoding_errors (str): 'strict', 'replace' (default), 'ignore' - how to handle errors with encodings (see `this section <https://docs.python.org/3/library/codecs.html#codec-base-classes>`__ of the standard library's :mod:`codecs` module for more information) (this is handled by :func:`lasio.reader.open_with_codecs`) autodetect_encoding (str or bool): default True to use `chardet <https://github.com/chardet/chardet>`__ to detect encoding. Note if set to False several common encodings will be tried but chardet won't be used. (this is handled by :func:`lasio.reader.open_with_codecs`) autodetect_encoding_chars (int/None): number of chars to read from LAS file for auto-detection of encoding. (this is handled by :func:`lasio.reader.open_with_codecs`) """ logger.debug("Reading {}...".format(str(file_ref))) # Determine which lines to ignore: if ignore_comments is None: ignore_comments = [] if isinstance(ignore_comments, str): ignore_comments = [ignore_comments] logger.debug("Ignore header lines beginning with {}".format(ignore_comments)) logger.debug("Ignore data lines beginning with {}".format(ignore_data_comments)) # Attempt to read file file_obj = "" try: file_obj, self.encoding = reader.open_file(file_ref, **kwargs) test_lidar = file_obj.read(4) if test_lidar == "LASF": err_msg = "This is a LASer file (i.e. LiDAR data), not a Log ASCII Standard file" raise IOError(err_msg) else: file_obj.seek(0) logger.debug( "Fetching substitutions for read_policy {} and null policy {}".format( read_policy, null_policy ) ) provisional_version = 2.0 provisional_wrapped = "YES" provisional_null = None provisional_delimiter = "SPACE" section_positions = reader.find_sections_in_file(file_obj) logger.debug("Found {} sections".format(len(section_positions))) if len(section_positions) == 0: raise KeyError("No ~ sections found. Is this a LAS file?") data_section_indices = [] # This is a transitional data_section_indicies till the las30 data # reading can handle 1.2 and 2.0 data, then it will be merged back # into data_section_indices las3_data_section_indices = [] las3_section_indicators = ["_DATA", "_PARAMETER", "_DEFINITION"] for i, (k, first_line, last_line, section_title) in enumerate( section_positions ): section_type = reader.determine_section_type(section_title) tmpl = "Parsing {typ} section at lines {first_line}-{last_line} ({k} bytes) {title}" logger.debug(tmpl.format( typ=section_type, title=section_title, first_line=first_line + 1, last_line=last_line + 1, k=k, ) ) # Read traditional LAS header item section if section_type == "Header items": file_obj.seek(k) sct_items = reader.parse_header_items_section( file_obj, line_nos=(first_line, last_line), version=provisional_version, ignore_header_errors=ignore_header_errors, mnemonic_case=mnemonic_case, ignore_comments=ignore_comments, ) # Update provisional statuses if "VERS" in sct_items: provisional_version = sct_items.VERS.value if "WRAP" in sct_items: provisional_wrapped = sct_items.WRAP.value if "NULL" in sct_items: provisional_null = sct_items.NULL.value if "DLM" in sct_items: provisional_delimiter = sct_items.DLM.value # las3 sections can contain _Data, _Parameter or _Definition las3_section = any( [ section_str in section_title[1:].upper() for section_str in las3_section_indicators ] ) # TODO: Revise so that Version, Well and Parameter(Log_Parameter) # are handled the properly for LAS-1.2, LAS-2.0 LAS-3.0 # Set "Curves" for LAS-1.2, LAS-2.0, LAS-3.0 if ( section_title[1] == "C" and "_" not in section_title ) or "~Log_Definition" in section_title: self.sections["Curves"] = sct_items elif ( section_title[1] == "P" and "_" not in section_title ) or "~Log_Parameter" in section_title: self.sections["Parameter"] = sct_items # Set any other LAS3.0 sections elif provisional_version == 3.0 and las3_section: self.sections[section_title[1:]] = sct_items # Set regular sections elif section_title[1] == "V": self.sections["Version"] = sct_items elif section_title[1] == "W": self.sections["Well"] = sct_items else: self.sections[section_title[1:]] = sct_items # Read free-text LAS header section elif section_type == "Header (other)": file_obj.seek(k) line_no = first_line contents = [] for line in file_obj: if line.startswith("~") and line_no == last_line: break if line.startswith("~"): continue line_no += 1 contents.append(line.strip("\n").strip()) if line_no == last_line: break sct_contents = "\n".join(contents) if section_title[1] == "O": self.sections["Other"] = sct_contents else: self.sections[section_title[1:]] = sct_contents elif section_type == "Data": logger.debug("Storing reference and returning later...") data_section_indices.append(i) # Initial stub for parsing las3 data. This is probably a # transitional section that will merge with 1.2/2.0 data # parsing once fully functional elif section_type == "Las3_Data": logger.debug("Storing Las3_Data reference and returning later...") las3_data_section_indices.append(i) line_splitter = reader.define_line_splitter(provisional_delimiter) if provisional_delimiter == "COMMA": read_policy = "comma-delimiter" regexp_subs, value_null_subs, version_NULL = reader.get_substitutions( read_policy, null_policy ) if not ignore_data: # Override the default "numpy" parser with the 'normal' parser # for these conditions: # - file is wrapped # - null_policy is not "strict" # - dtypes is not "auto". Numpy can handle specified dtypes but # the performance decays to the 'normal' performance level. # normal engine. if provisional_wrapped == "YES" or null_policy != "strict" or dtypes != "auto": if engine != "normal": logger.warning("Only engine='normal' can read wrapped files") if use_normal_engine_for_wrapped: engine = "normal" if ( len(data_section_indices) == 0 and len(las3_data_section_indices) > 0 ): data_section_indices = las3_data_section_indices # Check for the number of columns in each data section. for k, first_line, last_line, section_title in [ section_positions[i] for i in data_section_indices ]: logger.debug("Reading data section {}".format(section_title)) file_obj.seek(k) n_columns, recommended_regexp_subs = reader.inspect_data_section( file_obj, (first_line, last_line), regexp_subs, ignore_data_comments=ignore_data_comments, ) if recommended_regexp_subs != regexp_subs and accept_regexp_sub_recommendations: logger.info( f"The read substitutions {defaults.HYPHEN_SUBS}" "have been removed as this file appears to contain hyphens.") regexp_subs = recommended_regexp_subs n_columns, recommended_regexp_subs = reader.inspect_data_section( file_obj, (first_line, last_line), regexp_subs, ignore_data_comments=ignore_data_comments, ) # How many curves should the reader attempt to find? reader_n_columns = n_columns if reader_n_columns == -1: reader_n_columns = len(self.curves) file_obj.seek(k) # Convert dtypes passed as dict into list for all columns # defaulting to float for any not specified. if isinstance(dtypes, dict): dtypes = [dtypes.get(c.mnemonic, float) for c in self.curves] # ---------------------------------------------------------------------- # Notes # see 2d9e43c3 and e960998f for 'try' background # 2023-03-03: dcs: # With the addtion of "Exception" to "except Exception" the # "except KeybboardInterrupt" shouldn't be needed because # "except Exception" won't catch the KeyboardInterrupt exception # .. verify before removing, by Cntrl-C in the middle of loading a big # las file.. # ---------------------------------------------------------------------- # Attempt to read the data section if engine == "numpy": try: curves_data_gen = reader.read_data_section_iterative_numpy_engine( file_obj, (first_line, last_line) ) except Exception: try: file_obj.seek(k) curves_data_gen = ( reader.read_data_section_iterative_normal_engine( file_obj, (first_line, last_line), regexp_subs, value_null_subs, ignore_data_comments=ignore_data_comments, n_columns=reader_n_columns, dtypes=dtypes, line_splitter=line_splitter, ) ) except Exception: raise exceptions.LASDataError( traceback.format_exc()[:-1] + " in data section beginning line {}".format(i + 1) ) if engine == "normal": try: curves_data_gen = ( reader.read_data_section_iterative_normal_engine( file_obj, (first_line, last_line), regexp_subs, value_null_subs, ignore_data_comments=ignore_data_comments, n_columns=reader_n_columns, dtypes=dtypes, line_splitter=line_splitter, ) ) except Exception: raise exceptions.LASDataError( traceback.format_exc()[:-1] + " in data section beginning line {}".format(i + 1) ) # Assign data to curves. data_assigned_to_curves = { curve_idx: False for curve_idx in range(len(self.curves)) } curve_idx = 0 curve_length = 0 for curve_arr in curves_data_gen: # Do not replace nulls in the index curve. if version_NULL and curve_arr.dtype == float and curve_idx != 0: logger.debug( "Replacing {} with nan in {}-th curve".format( provisional_null, curve_idx ) ) curve_arr[curve_arr == provisional_null] = np.nan logger.debug( "Assigning data {} to curve #{}".format( curve_arr, curve_idx ) ) if curve_length == 0: curve_length = len(curve_arr) if curve_idx < len(self.curves): self.curves[curve_idx].data = curve_arr else: logger.debug("Creating new curve") curve = CurveItem(mnemonic="", data=curve_arr) self.curves.append(curve) data_assigned_to_curves[curve_idx] = True curve_idx += 1 # Assign missing data indicators for curves which have no data in the # data section. for curve_idx, flag in data_assigned_to_curves.items(): if flag is False: logger.warning( "Curve #{:.0f} '{:s}' is defined in the ~C section " "but there is no data in ~A" .format(curve_idx, self.curves[curve_idx].mnemonic) ) self.curves[curve_idx].data = np.empty(curve_length) * np.nan finally: if hasattr(file_obj, "close"): file_obj.close() # TODO: reimplement these warnings!! # logger.warning("No data section (regexp='~A') found") # logger.warning("No numerical data found inside ~A section") # Understand the depth/index unit. if "m" in str(index_unit): index_unit = "m" if index_unit: self.index_unit = index_unit else: check_units_on = [] for mnemonic in ("STRT", "STOP", "STEP"): if mnemonic in self.well: check_units_on.append(self.well[mnemonic]) if len(self.curves) > 0: check_units_on.append(self.curves[0]) matches = [] for index_unit, possibilities in defaults.DEPTH_UNITS.items(): for check_unit in check_units_on: if any([check_unit.unit == p for p in possibilities]) or any( [check_unit.unit.upper() == p for p in possibilities] ): matches.append(index_unit) matches = set(matches) if len(matches) == 1: self.index_unit = tuple(matches)[0] elif len(matches) == 0: self.index_unit = None else: logger.warning("Conflicting index units found: {}".format(matches)) self.index_unit = None if len(self.curves) > 0: self.index_initial = self.index.copy()
[docs] def update_start_stop_step(self, STRT=None, STOP=None, STEP=None, fmt="%.5f"): """Configure or change STRT, STOP, and STEP values on the LASFile object. Keyword Arguments: STRT, STOP, STEP (str, int, float): value to set on the relevant header item in the ~Well section - can be any data type. fmt (str): Python format string for formatting the STRT/STOP/STEP value in the situation where any of those keyword arguments are None If STRT/STOP/STEP are not passed to this method, they will be automatically calculated from the index curve. """ # If we are getting STRT and STOP from the data then format them to a # standard precision. # If they are passed in with values, don't format them because we # assume they are at the user's expected precision. # If the 'try' fails because self.index doesn't exist or is empty # then use the default or parameter values for STRT, STOP, and STEP. try: if STRT is None: STRT = fmt % self.index[0] if STOP is None: STOP = fmt % self.index[-1] if STEP is None: # prevents an error being thrown in the case of only a single sample being written if STOP != STRT: raw_step = self.index[1] - self.index[0] STEP = fmt % raw_step except IndexError: pass self.well["STRT"].value = STRT self.well["STOP"].value = STOP self.well["STEP"].value = STEP
[docs] def update_units_from_index_curve(self): """Align STRT/STOP/STEP header item units with the index curve's units.""" # Check units if self.curves and self.curves[0].unit: unit = self.curves[0].unit else: unit = self.well["STRT"].unit self.well["STRT"].unit = unit self.well["STOP"].unit = unit self.well["STEP"].unit = unit # Check that curves exists to avoid throwing an expection. # to write to an non-existant object. if self.curves: self.curves[0].unit = unit
[docs] def write(self, file_ref, **kwargs): """Write LAS file to disk. Arguments: file_ref (open :term:`file-like object` or :class:`str`): either a file-like object open for writing, or a filename. All ``**kwargs`` are passed to :func:`lasio.writer.write` -- please check the docstring of that function for more keyword arguments you can use here! Examples: >>> import lasio >>> las = lasio.read("tests/examples/sample.las") >>> with open('test_output.las', mode='w') as f: ... las.write(f, version=2.0) # <-- this method """ opened_file = False if isinstance(file_ref, basestring) and not hasattr(file_ref, "write"): opened_file = True file_ref = open(file_ref, "w") writer.write(self, file_ref, **kwargs) if opened_file: file_ref.close()
[docs] def to_excel(self, filename): """Export LAS file to a Microsoft Excel workbook. Arguments: filename (str): a name for the file to be created and written to. """ from . import excel converter = excel.ExcelConverter(self) converter.write(filename)
[docs] def to_csv(self, file_ref, mnemonics=True, units=True, units_loc="line", **kwargs): r"""Export to a CSV file. Arguments: file_ref (open :term:`file-like object` or :class:`str`): either a file-like object open for writing, or a filename. Keyword Arguments: mnemonics (list, True, False): write mnemonics as a header line at the start. If list, use the supplied items as mnemonics. If True, use the curve mnemonics. units (list, True, False): as for mnemonics. units_loc (str or None): either 'line', '[]' or '()'. 'line' will put units on the line following the mnemonics (good for WellCAD). '[]' and '()' will put the units in either brackets or parentheses following the mnemonics, on the single header line (better for Excel) **kwargs: passed to :class:`csv.writer`. Note that if ``lineterminator`` is **not** specified here, then it will be sent to :class:`csv.writer` as ``lineterminator='\\n'``. """ opened_file = False if isinstance(file_ref, basestring) and not hasattr(file_ref, "write"): opened_file = True file_ref = open(file_ref, "w") if "lineterminator" not in kwargs: kwargs["lineterminator"] = "\n" writer = csv.writer(file_ref, **kwargs) if mnemonics is True: mnemonics = [c.original_mnemonic for c in self.curves] if units is True: units = [c.unit for c in self.curves] if mnemonics: if units_loc in ("()", "[]") and units: mnemonics = [ m + " " + units_loc[0] + u + units_loc[1] for m, u in zip(mnemonics, units) ] writer.writerow(mnemonics) if units: if units_loc == "line": writer.writerow(units) for i in range(self.data.shape[0]): writer.writerow(self.data[i, :]) if opened_file: file_ref.close()
[docs] def match_raw_section(self, pattern, re_func="match", flags=re.IGNORECASE): """Find raw section with a regular expression. Arguments: pattern (str): regular expression (you need to include the tilde) Keyword Arguments: re_func (str): either "match" or "search", see python ``re`` module. flags (int): flags for :func:`re.compile` Returns: dict Intended for internal use only. """ for title in self.raw_sections.keys(): title = title.strip() p = re.compile(pattern, flags=flags) if re_func == "match": re_func = re.match elif re_func == "search": re_func = re.search m = re_func(p, title) if m: return self.raw_sections[title]
[docs] def get_curve(self, mnemonic): """Return CurveItem object. Arguments: mnemonic (str): the name of the curve Returns: :class:`lasio.CurveItem` (not just the data array) """ for curve in self.curves: if curve.mnemonic == mnemonic: return curve
[docs] def __getitem__(self, key): """Provide access to curve data. Arguments: key (str, int): either a curve mnemonic or the column index. Returns: 1D :class:`numpy.ndarray` (the data for the curve) """ # TODO: If I implement 2D arrays, need to check here for :1 :2 :3 etc. curve_mnemonics = [c.mnemonic for c in self.curves] if isinstance(key, int): return self.curves[key].data elif key in curve_mnemonics: return self.curves[key].data else: raise KeyError("{} not found in curves ({})".format(key, curve_mnemonics))
[docs] def __setitem__(self, key, value): """Append a curve. Arguments: key (str): the curve mnemonic value (1D data or CurveItem): either the curve data, or a CurveItem See :meth:`lasio.LASFile.append_curve_item` or :meth:`lasio.LASFile.append_curve` for more details. """ if isinstance(value, CurveItem): if key != value.mnemonic: raise KeyError( "key {} does not match value.mnemonic {}".format( key, value.mnemonic ) ) if key in self.curves.keys(): ix = self.curves.keys().index(key) self.replace_curve_item(ix, value) else: self.append_curve_item(value) else: # Assume value is an ndarray if key in self.curves.keys(): self.update_curve(mnemonic=key, data=value) else: self.append_curve(key, value)
[docs] def keys(self): """Return curve mnemonics.""" return [c.mnemonic for c in self.curves]
[docs] def values(self): """Return data for each curve.""" return [c.data for c in self.curves]
[docs] def items(self): """Return mnemonics and data for all curves.""" return [(c.mnemonic, c.data) for c in self.curves]
def iterkeys(self): return iter(list(self.keys())) def itervalues(self): return iter(list(self.values())) def iteritems(self): return iter(list(self.items())) @property def version(self): """Header information from the Version (~V) section. Returns: :class:`lasio.SectionItems` object. """ return self.sections["Version"] @version.setter def version(self, section): self.sections["Version"] = section @property def well(self): """Header information from the Well (~W) section. Returns: :class:`lasio.SectionItems` object. """ return self.sections["Well"] @well.setter def well(self, section): self.sections["Well"] = section @property def curves(self): """Curve information and data from the Curves (~C) and data section.. Returns: :class:`lasio.SectionItems` object. """ return self.sections["Curves"] @curves.setter def curves(self, section): self.sections["Curves"] = section @property def curvesdict(self): """Curve information and data from the Curves (~C) and data section.. Returns: dict """ d = {} for curve in self.curves: d[curve["mnemonic"]] = curve return d @property def params(self): """Header information from the Parameter (~P) section. Returns: :class:`lasio.SectionItems` object. """ return self.sections["Parameter"] @params.setter def params(self, section): self.sections["Parameter"] = section @property def other(self): """Header information from the Other (~O) section. Returns: str """ return self.sections["Other"] @other.setter def other(self, section): self.sections["Other"] = section @property def header(self): """All header information. Returns: dict """ return self.sections
[docs] def df(self): """Return data as a :class:`pandas.DataFrame` structure. The first Curve of the LASFile object is used as the pandas DataFrame's index. """ import pandas as pd from pandas.api.types import is_object_dtype df = pd.DataFrame(self.data, columns=[c.mnemonic for c in self.curves]) for column in df.columns: if is_object_dtype(df[column].dtype): try: df[column] = df[column].astype(np.float64) except ValueError: pass if len(self.curves) > 0: df = df.set_index(self.curves[0].mnemonic) return df
@property def data(self): return np.vstack([c.data for c in self.curves]).T @data.setter def data(self, value): return self.set_data(value)
[docs] def set_data(self, array_like, names=None, truncate=False): """Set the data for the LAS; actually sets data on individual curves. Arguments: array_like (array_like or :class:`pandas.DataFrame`): 2-D data array Keyword Arguments: names (list, optional): used to replace the names of the existing :class:`lasio.CurveItem` objects. truncate (bool): remove any columns which are not included in the Curves (~C) section. Note: you can pass a :class:`pandas.DataFrame` to this method. If you do this, the index of the DataFrame will be used as the first curve in the LAS file (i.e. it will not be discarded). """ try: import pandas as pd except ImportError: pass else: if isinstance(array_like, pd.DataFrame): return self.set_data_from_df( array_like, **dict(names=names, truncate=False) ) data = np.asarray(array_like) # Truncate data array if necessary. if truncate: data = data[:, len(self.curves)] # Extend curves list if necessary. while data.size > 0 and (data.shape[1] > len(self.curves)): self.curves.append(CurveItem("")) if not names: names = [c.original_mnemonic for c in self.curves] else: # Extend names list if necessary. while len(self.curves) > len(names): names.append("") logger.debug("set_data. names to use: {}".format(names)) if data.size > 0: for i, curve in enumerate(self.curves): curve.mnemonic = names[i] curve.data = data[:, i] self.curves.assign_duplicate_suffixes()
[docs] def set_data_from_df(self, df, **kwargs): """Set the LAS file data from a :class:`pandas.DataFrame`. Arguments: df (pandas.DataFrame): curve mnemonics are the column names. The depth column for the curves must be the index of the DataFrame. Keyword arguments are passed to :meth:`lasio.LASFile.set_data`. """ df_values = np.vstack([df.index.values, df.values.T]).T if ("names" not in kwargs) or (not kwargs["names"]): kwargs["names"] = [df.index.name] + [ str(name) for name in df.columns.values ] self.set_data(df_values, **kwargs)
[docs] def stack_curves(self, mnemonic, sort_curves=True): """Stack multi-channel curve data to a numpy 2D ndarray. Provide a stub name (prefix shared by all curves that will be stacked) or a list of curve mnemonic strings. Keyword Arguments: mnemonic (str or list): Supply the first several characters of the channel set to be stacked. Alternatively, supply a list of the curve names (mnemonics strings) to be stacked. sort_curves (bool): Natural sort curves based on mnemonic prior to stacking. Returns: 2-D numpy array """ if isinstance(mnemonic, np.ndarray): mnemonic = list(mnemonic) if (not mnemonic) or (not all([i for i in mnemonic])): raise ValueError("`mnemonic` must not contain empty element") keys = self.curves.keys() if isinstance(mnemonic, str): channels = [i for i in keys if i.startswith(mnemonic)] or [mnemonic] elif isinstance(mnemonic, Sequence): channels = list(mnemonic) else: raise TypeError("`mnemonic` argument must be string or sequence") print(channels) if not set(keys).issuperset(set(channels)): missing = ", ".join(set(channels).difference(set(keys))) raise KeyError("{} not found in LAS curves.".format(missing)) if sort_curves: # Sort the channel list by numbers in the element strings # Example: ['CBP2', 'CBP1'] > ['CBP1', 'CBP2'] channels.sort(key=lambda x: [ int(i) if i.isdigit() else i for i in re.split(r"(\d+)", x) ]) indices = [keys.index(i) for i in channels] return self.data[:, indices]
@property def index(self): """Return data from the first column of the LAS file data (depth/time).""" return self.curves[0].data @property def depth_m(self): """Return the index as metres.""" if self._index_unit_contains("M"): return self.index elif self._index_unit_contains("F"): return self.index * 0.3048 elif self._index_unit_contains(".1IN"): return (self.index / 120) * 0.3048 else: raise exceptions.LASUnknownUnitError("Unit of depth index not known") @property def depth_ft(self): """Return the index as feet.""" if self._index_unit_contains("M"): return self.index / 0.3048 elif self._index_unit_contains("F"): return self.index elif self._index_unit_contains(".1IN"): return self.index / 120 else: raise exceptions.LASUnknownUnitError("Unit of depth index not known") def _index_unit_contains(self, unit_code): """Check value of index_unit string, ignore case. Args: unit_code (string): e.g. 'M' or 'FT' """ return self.index_unit and (unit_code.upper() in self.index_unit.upper())
[docs] def append_curve_item(self, curve_item): """Add a CurveItem. Args: curve_item (lasio.CurveItem) """ self.insert_curve_item(len(self.curves), curve_item)
[docs] def insert_curve_item(self, ix, curve_item): """Insert a CurveItem. Args: ix (int): position to insert CurveItem i.e. 0 for start curve_item (lasio.CurveItem) """ assert isinstance(curve_item, CurveItem) self.curves.insert(ix, curve_item)
def replace_curve_item(self, ix, curve_item): """Replace a CurveItem. Args: ix (int): position to insert CurveItem i.e. 0 for start curve_item (lasio.CurveItem) """ self.delete_curve(ix=ix) self.insert_curve_item(ix, curve_item)
[docs] def append_curve(self, mnemonic, data, unit="", descr="", value=""): """Add a curve. Arguments: mnemonic (str): the curve mnemonic data (1D ndarray): the curve data Keyword Arguments: unit (str): curve unit descr (str): curve description value (int/float/str): value e.g. API code. """ return self.insert_curve(len(self.curves), mnemonic, data, unit, descr, value)
[docs] def insert_curve(self, ix, mnemonic, data, unit="", descr="", value=""): """Insert a curve. Arguments: ix (int): position to insert curve at i.e. 0 for start. mnemonic (str): the curve mnemonic data (1D ndarray): the curve data Keyword Arguments: unit (str): curve unit descr (str): curve description value (int/float/str): value e.g. API code. """ curve = CurveItem(mnemonic, unit, value, descr, data) self.insert_curve_item(ix, curve)
[docs] def delete_curve(self, mnemonic=None, ix=None): """Delete a curve. Keyword Arguments: ix (int): index of curve in LASFile.curves. mnemonic (str): mnemonic of curve. The index takes precedence over the mnemonic. """ if ix is None: ix = self.curves.keys().index(mnemonic) self.curves.pop(ix)
def update_curve(self, mnemonic=None, data=False, **kwargs): """Update a curve. Keyword Arguments: mnemonic (str): mnemonic of curve. data (ndarray): new data array (False if no update desired) ix (int): index of curve in LASFile.curves. unit (str): new value for unit (False if no update desired) descr (str): new description (False if no update desired) value (str/int/float etc): new value (False if no update desired) The index takes precedence over the mnemonic. """ ix = kwargs.get("ix", None) unit = kwargs.get("unit", False) descr = kwargs.get("descr", False) value = kwargs.get("value", False) if ix is None: ix = self.curves.keys().index(mnemonic) curve = self.curves[ix] if data is not False: curve.data = data if unit is not False: curve.unit = unit if descr is not False: curve.descr = descr if value is not False: curve.value = value @property def json(self): """Return object contents as a JSON string.""" return self.to_json()
[docs] def to_json(self): return json.dumps(self, cls=JSONEncoder)
@json.setter def json(self, value): raise Exception("Cannot set objects from JSON")
class Las(LASFile): """LAS file object. Retained for backwards compatibility. """ pass class JSONEncoder(json.JSONEncoder): """Extend json.JSONEncoder for LAS specific Json output.""" def default(self, obj): if isinstance(obj, LASFile): d = {"metadata": {}, "data": {}} for name, section in obj.sections.items(): if isinstance(section, basestring): d["metadata"][name] = section else: try: d["metadata"][name] = section.dictview() except AttributeError: for item in section: d["metadata"][name].append(dict(item)) for curve in obj.curves: d["data"][curve.mnemonic] = [ None if np.isnan(x) else x for x in curve.data ] return d