1. 問題の事象
以下がコードの該当箇所。
app.py
extractor = twitter_setup()
search_words = "Trump" + " -filter:retweets"
since=date_since = "2019-04-25"
tweets = tw.Cursor(extractor.search, q=search_words, lang="en", since=date_since).items(5)
print("Number of tweets extracted: {}.\n".format(len(tweets)))
for tweet in tweets:
print(tweet.text)
tweet_text = pd.DataFrame(data=[tweet.text for tweet in tweets],columns=['Twwet'])
print(tweet_text)
Output:
Traceback (most recent call last):
File "C:\app.py", line 28, in <module>
print("Number of tweets extracted: {}.\n".format(len(tweets)))
TypeError: object of type 'ItemIterator' has no len()
File "C:\app.py", line 28, in <module>
print("Number of tweets extracted: {}.\n".format(len(tweets)))
TypeError: object of type 'ItemIterator' has no len()
この、
print("Number of tweets extracted: {}.\n".format(len(tweets))) がない場合
for tweet in tweets:
print(tweet.text)
以降が実行されて、以下がOutputされる。
Output:
Peter Funt, who also wrote that "Trump should get credit for his awesomeness," in a different o
pinion piece, wrote… https://t.co/TlQEfzDUPp
@nadeema @Emeraldleah @JoeBiden What planet are you living on?
He is still extremely popular and would easily dust Trump
Excited for the first animated film to make a Trump joke
@RonMatthews82 @Steigerworld Saudi Arabia is home to some of the most radical Islamist extremist, dude. Trump is da… https://t.co/8V7L4i4gxM
i wanna vent somewhere but i think theres a trump supporter or two in every server or twitter i
have and i dont wan… https://t.co/beijxcUkyz
Empty DataFrame
Columns: [Twwet]
Index: []
pinion piece, wrote… https://t.co/TlQEfzDUPp
@nadeema @Emeraldleah @JoeBiden What planet are you living on?
He is still extremely popular and would easily dust Trump
Excited for the first animated film to make a Trump joke
@RonMatthews82 @Steigerworld Saudi Arabia is home to some of the most radical Islamist extremist, dude. Trump is da… https://t.co/8V7L4i4gxM
i wanna vent somewhere but i think theres a trump supporter or two in every server or twitter i
have and i dont wan… https://t.co/beijxcUkyz
Empty DataFrame
Columns: [Twwet]
Index: []
となり、tweetに格納された.text属性データはプリント・アウトできるが、PandasのDataFrameにすることはできない。
2. 調査
調べると、このobject of type 'ItemIterator' は、tweepyパッケージのCursor.pyでdefineされたClass Objectらしい。
cursor.py
class ItemIterator(BaseIterator):
def __init__(self, page_iterator):
self.page_iterator = page_iterator
self.limit = 0
self.current_page = None
self.page_index = -1
self.num_tweets = 0
def next(self):
if self.limit > 0:
if self.num_tweets == self.limit:
raise StopIteration
if self.current_page is None or self.page_index == len(self.current_page) - 1:
# Reached end of current page, get the next page...
self.current_page = self.page_iterator.next()
self.page_index = -1
self.page_index += 1
self.num_tweets += 1
return self.current_page[self.page_index]
def prev(self):
if self.current_page is None:
raise TweepError('Can not go back more, at first page')
if self.page_index == 0:
# At the beginning of the current page, move to next...
self.current_page = self.page_iterator.prev()
self.page_index = len(self.current_page)
if self.page_index == 0:
raise TweepError('No more items')
self.page_index -= 1
self.num_tweets -= 1
return self.current_page[self.page_index]
def __init__(self, page_iterator):
self.page_iterator = page_iterator
self.limit = 0
self.current_page = None
self.page_index = -1
self.num_tweets = 0
def next(self):
if self.limit > 0:
if self.num_tweets == self.limit:
raise StopIteration
if self.current_page is None or self.page_index == len(self.current_page) - 1:
# Reached end of current page, get the next page...
self.current_page = self.page_iterator.next()
self.page_index = -1
self.page_index += 1
self.num_tweets += 1
return self.current_page[self.page_index]
def prev(self):
if self.current_page is None:
raise TweepError('Can not go back more, at first page')
if self.page_index == 0:
# At the beginning of the current page, move to next...
self.current_page = self.page_iterator.prev()
self.page_index = len(self.current_page)
if self.page_index == 0:
raise TweepError('No more items')
self.page_index -= 1
self.num_tweets -= 1
return self.current_page[self.page_index]
というClassとして定義されている。
>>> type(tweets)
<class 'tweepy.cursor.ItemIterator'>
>>> print(tweets[0])
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: 'ItemIterator' object does not support indexing <== これ良く出てくる。
>>> tweets.__dict__.keys()
dict_keys(['page_iterator', 'limit', 'current_page', 'page_index', 'num_tweets'])
>>> print(dir(tweets))
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'current_page', 'limit', 'next', 'num_tweets', 'page_index', 'page_iterator', 'prev']
以前の「PythonのPandasで、取得したtweepyのデータを整理する」で実施した
<class 'tweepy.models.ResultSet'>との比較は、こちらでどうぞ。
https://mycodingjp.blogspot.com/2019/04/pythonpandastweepy.html
search_words = "Trump"
since=date_since = "2019-04-25"
tweets = tw.Cursor(extractor.search, q=search_words, lang="en", since=date_since).items(5)
print([tweet.text for tweet in tweets])
tweets = tw.Cursor(extractor.search, q=search_words, lang="en", since=date_since).items(5)
users_locs = [[tweet.user.screen_name, tweet.user.location] for tweet in tweets]
for users_loc in users_locs:
print(users_loc)
tweetsをprintする毎に定義しないといけない。変数 users_locの前に、再定義しないとPrintできない。
["RT @johnlundin: CNN's Jim Acosta rips Trump after he refuses to defend his repeated 'coup' cl
aims: 'I don't think he understands what he's…", 'RT @gatewaypundit: Trump Announces Withdrawa
l From U.N. Small Arms Treaty -- Signs Doc in Front of NRA, Tosses Pen Into Crowd (VIDEO) https
…', "RT @ddale8: As reported, Trump announces he will revoke the US signature from the global
arms-trade treaty. People cheer. He says he's impr…", 'RT @RudyGiuliani: The article below is
one of a number showing a possible conspiracy(collusion)between DNC and Clinton operatives and
Ukrai…', 'RT @robreiner: Every day Trump prevents anyone from his administration from testifyi
ng to Congress is another day he commits obstruction of…']
['JoeEbert11', 'New Hampshire, USA']
['GmMaklinesr', 'Virginia']
['gfeinholtz', 'Tucson']
['joseph_feeny', 'Ohio, USA']
['moreresistance2', 'California, USA']
3. 対策
インストールされているtweepyのばージョンとPython3のVersionを確認する。
>pip freeze
tweepy 3.7.0
tweepyのバージョンとサポート状況
https://github.com/tweepy/tweepy
Python 2.7, 3.4, 3.5, 3.6, & 3.7 are supported.
半年前にアップされたチュートリアルでは、tweepy3.6が最新で、Python3.7はサポートされていなかったらしいが、自分がインストールしたバージョン(3.7.0)、既にPython3.7をサポートしているようだ。
search_words = "Trump"
since=date_since = "2019-04-25"
tweets = tw.Cursor(extractor.search, q=search_words, lang="en", since=date_since).items(5)
users_locs = [[tweet.user.screen_name, tweet.user.location] for tweet in tweets]
for users_loc in users_locs:
print(users_loc)
tweet_text = pd.DataFrame(data=users_locs,columns=['User','Location'])
print(tweet_text)
['60andmoonwalkin', 'United States']
['CritterFL', 'Pittsburgh, PA']
['usmaan_aisha', '']
['alahley', 'Bend, Oregon']
['debos111', '']
User Location
0 60andmoonwalkin United States
1 CritterFL Pittsburgh, PA
2 usmaan_aisha
3 alahley Bend, Oregon
4 debos111
0 件のコメント:
コメントを投稿