Skip to content

CLN: use stdlib Iterator instead of BaseIterator #30370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 4 additions & 15 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import bz2
import codecs
from collections.abc import Iterator
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be failing ci on Check for non-standard imports

log shows

##[error]pandas/io/stata.py:12:from collections.abc import Iterator
##[error]pandas/io/sas/sas7bdat.py:16:from collections.abc import Iterator
##[error]pandas/io/sas/sas_xport.py:10:from collections.abc import Iterator
##[error]pandas/io/common.py:5:from collections.abc import Iterator
##[error]pandas/io/parsers.py:6:from collections.abc import Iterator
##[error]pandas/io/json/_json.py:2:from collections.abc import Iterator

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changes to invgrep in #29915. maybe related. @datapythonista

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's strange, because I tested that the exit codes were correct in that PR, but I agree the problem is likely to be introduced in that PR.

I'm travelling without my computer at the moment, and I can't test myself. But feel free to revert that PR if you identify it's the one causing the problem, and I'll redo it with the problem fixed later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the preferred way to do the import?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the correct way to do the import? I'm getting this failure in my PR https://github.com/pandas-dev/pandas/pull/30151/checks?check_run_id=362567629

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the import is correct. Must be something wrong with the invgrep as noted by @simonjayhawkins

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alimcmaster1 might have an idea here as well

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure from reading #25957 (review) where this check was implemented our preferred way to do the import is from collections import abc. Or from collections import Iterator

I can update this now if helpful @mroeschke ?

Agree - seems to be something flakey with the invgrep on github actions since fails for me locally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That'd be great @alimcmaster1, thanks!

import gzip
from io import BufferedIOBase, BytesIO
import mmap
Expand Down Expand Up @@ -49,18 +50,6 @@
_VALID_URLS.discard("")


class BaseIterator:
"""Subclass this and provide a "__next__()" method to obtain an iterator.
Useful only when the object being iterated is non-reusable (e.g. OK for a
parser, not for an in-memory table, yes for its iterator)."""

def __iter__(self) -> "BaseIterator":
return self

def __next__(self):
raise AbstractMethodError(self)


def is_url(url) -> bool:
"""
Check to see if a URL has a valid protocol.
Expand Down Expand Up @@ -515,7 +504,7 @@ def closed(self):
return self.fp is None


class _MMapWrapper(BaseIterator):
class _MMapWrapper(Iterator):
"""
Wrapper for the Python's mmap class so that it can be properly read in
by Python's csv.reader class.
Expand Down Expand Up @@ -552,7 +541,7 @@ def __next__(self) -> str:
return newline


class UTF8Recoder(BaseIterator):
class UTF8Recoder(Iterator):
"""
Iterator that reads an encoded stream and re-encodes the input to UTF-8
"""
Expand All @@ -566,7 +555,7 @@ def read(self, bytes: int = -1) -> bytes:
def readline(self) -> bytes:
return self.reader.readline().encode("utf-8")

def next(self) -> bytes:
def __next__(self) -> bytes:
return next(self.reader).encode("utf-8")

def close(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
from collections.abc import Iterator
import functools
from io import StringIO
from itertools import islice
Expand All @@ -19,7 +20,6 @@
from pandas.core.reshape.concat import concat

from pandas.io.common import (
BaseIterator,
get_filepath_or_buffer,
get_handle,
infer_compression,
Expand Down Expand Up @@ -616,7 +616,7 @@ def read_json(
return result


class JsonReader(BaseIterator):
class JsonReader(Iterator):
"""
JsonReader provides an interface for reading in a JSON file.

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from collections import defaultdict
from collections.abc import Iterator
import csv
import datetime
from io import StringIO
Expand Down Expand Up @@ -62,7 +63,6 @@
from pandas.core.tools import datetimes as tools

from pandas.io.common import (
BaseIterator,
UTF8Recoder,
get_filepath_or_buffer,
get_handle,
Expand Down Expand Up @@ -786,7 +786,7 @@ def read_fwf(
return _read(filepath_or_buffer, kwds)


class TextFileReader(BaseIterator):
class TextFileReader(Iterator):
"""

Passed dialect overrides any of the related parser options
Expand Down Expand Up @@ -3582,7 +3582,7 @@ def _get_col_names(colspec, columns):
return colnames


class FixedWidthReader(BaseIterator):
class FixedWidthReader(Iterator):
"""
A reader of fixed-width lines.
"""
Expand Down
5 changes: 3 additions & 2 deletions pandas/io/sas/sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
Reference for binary data compression:
http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
"""
from collections.abc import Iterator
from datetime import datetime
import struct

Expand All @@ -22,7 +23,7 @@

import pandas as pd

from pandas.io.common import BaseIterator, get_filepath_or_buffer
from pandas.io.common import get_filepath_or_buffer
from pandas.io.sas._sas import Parser
import pandas.io.sas.sas_constants as const

Expand All @@ -36,7 +37,7 @@ class _column:


# SAS7BDAT represents a SAS data file in SAS7BDAT format.
class SAS7BDATReader(BaseIterator):
class SAS7BDATReader(Iterator):
"""
Read SAS files in SAS7BDAT format.

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/sas/sas_xport.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

https://support.sas.com/techsup/technote/ts140.pdf
"""

from collections.abc import Iterator
from datetime import datetime
from io import BytesIO
import struct
Expand All @@ -19,7 +19,7 @@

import pandas as pd

from pandas.io.common import BaseIterator, get_filepath_or_buffer
from pandas.io.common import get_filepath_or_buffer

_correct_line1 = (
"HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!"
Expand Down Expand Up @@ -251,7 +251,7 @@ def _parse_float_vec(vec):
return ieee


class XportReader(BaseIterator):
class XportReader(Iterator):
__doc__ = _xport_reader_doc

def __init__(
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
You can find more information on http://presbrey.mit.edu/PyDTA and
http://www.statsmodels.org/devel/
"""

from collections.abc import Iterator
import datetime
from io import BytesIO
import os
Expand Down Expand Up @@ -44,7 +44,7 @@
from pandas.core.frame import DataFrame
from pandas.core.series import Series

from pandas.io.common import BaseIterator, get_filepath_or_buffer, stringify_path
from pandas.io.common import get_filepath_or_buffer, stringify_path

_version_error = (
"Version of given Stata file is not 104, 105, 108, "
Expand Down Expand Up @@ -1010,7 +1010,7 @@ def __init__(self):
)


class StataReader(StataParser, BaseIterator):
class StataReader(StataParser, Iterator):
__doc__ = _stata_reader_doc

def __init__(
Expand Down