txt (from searchTwitter REST API) into JSON with R -
i've been using searchtwitter function twitter rest api retrieve amount of tweets , i've dumped txt file. structure of txt file is:
"text" "favorited" "favoritecount" "replytosn" "created" "truncated" "replytosid" "id" "replytouid" "statussource" "screenname" "retweetcount" "isretweet" "retweeted" "longitude" "latitude" "1" "rt @kobebryant: last night final chapter incredible story. walk away @ peace knowing love game & city will…" false 0 na 2016-04-14 23:59:59 false na "720763566027096066" na "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">twitter iphone</a>" "jtlongway" 204125 true false na na "2" "rt @kobebryant: last night final chapter incredible story. walk away @ peace knowing love game & city will…" false 0 na 2016-04-14 23:59:59 false na "720763566014332928" na "<a href=\"http://twitter.com/download/android\" rel=\"nofollow\">twitter android</a>" "mr_wizrd" 204125 true false na na "3" "rt @magicjohnson: got chance know @kobebryant away court @ @dodgers game! #thankyoukobe #kb20 https://twitter.com/svsw…" false 0 na 2016-04-14 23:59:59 false na "720763563783110661" na "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">twitter iphone</a>" "tynashkobe" 777 true false na na
and have json structure, i.e.
{"created_at":"wed apr 13 22:06:02 +0000 2016","id":720372500065071104,"id_str":"720372500065071104","text":"rt @staplescenter: @kobebryant hold final press conference tonight. #thankyoukobe https:\/\/t.co\/1rtiq5eas9","source":"\u003ca href=\"http:\/\/tweetlogix.com\" rel=\"nofollow\"\u003etweetlogix\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":149681225,"id_str":"149681225","name":"sp","screen_name":"mr_layedbak","location":"west side of detroit","url":null,"description":"unfollow me if you're offended","protected":false,"verified":false,"followers_count":4326,"friends_count":597,"listed_count":105,"favourites_count":371,"statuses_count":227845,"created_at":"sat may 29 23:21:29 +0000 2010","utc_offset":-14400,"time_zone":"eastern time (us & canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/736248613\/7d89d45f16e6c4e508a883aded1aac64.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/736248613\/7d89d45f16e6c4e508a883aded1aac64.jpeg","profile_background_tile":true,"profile_link_color":"141313","profile_sidebar_border_color":"000000","profile_sidebar_fill_color":"000000","profile_text_color":"660a0a","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/719706881736974341\/xt8r51s8_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/719706881736974341\/xt8r51s8_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/149681225\/1452265608","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":{"created_at":"wed apr 13 21:37:36 +0000 2016","id":720365343500144640,"id_str":"720365343500144640","text":"this @kobebryant hold final press conference tonight. #thankyoukobe https:\/\/t.co\/1rtiq5eas9","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003etwitter iphone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":28725783,"id_str":"28725783","name":"staples center","screen_name":"staplescenter","location":"los angeles","url":"http:\/\/www.staplescenter.com","description":"sports , entertainment center of world located in downtown los angeles @lalive since 1999. instagram: @staplescenterla","protected":false,"verified":true,"followers_count":82891,"friends_count":10907,"listed_count":862,"favourites_count":1905,"statuses_count":11024,"created_at":"sat apr 04 03:04:17 +0000 2009","utc_offset":-25200,"time_zone":"pacific time (us & canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"131516","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/553367185700036609\/q6kh8ru8.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/553367185700036609\/q6kh8ru8.jpeg","profile_background_tile":true,"profile_link_color":"009999","profile_sidebar_border_color":"ffffff","profile_sidebar_fill_color":"efefef","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/2394735481\/7rom2fzqu1vwrq94yzll_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/2394735481\/7rom2fzqu1vwrq94yzll_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/28725783\/1416251684","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":264,"favorite_count":439,"entities":{"hashtags":[{"text":"thankyoukobe","indices":[71,84]}],"urls":[],"user_mentions":[{"screen_name":"kobebryant","name":"kobe bryant","id":1059194370,"id_str":"1059194370","indices":[14,25]}],"symbols":[],"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[85,108],"media_url":"http:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","url":"https:\/\/t.co\/1rtiq5eas9","display_url":"pic.twitter.com\/1rtiq5eas9","expanded_url":"http:\/\/twitter.com\/staplescenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[85,108],"media_url":"http:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","url":"https:\/\/t.co\/1rtiq5eas9","display_url":"pic.twitter.com\/1rtiq5eas9","expanded_url":"http:\/\/twitter.com\/staplescenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en"},"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"thankyoukobe","indices":[90,103]}],"urls":[],"user_mentions":[{"screen_name":"staplescenter","name":"staples center","id":28725783,"id_str":"28725783","indices":[3,17]},{"screen_name":"kobebryant","name":"kobe bryant","id":1059194370,"id_str":"1059194370","indices":[33,44]}],"symbols":[],"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[104,127],"media_url":"http:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","url":"https:\/\/t.co\/1rtiq5eas9","display_url":"pic.twitter.com\/1rtiq5eas9","expanded_url":"http:\/\/twitter.com\/staplescenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}},"source_status_id":720365343500144640,"source_status_id_str":"720365343500144640","source_user_id":28725783,"source_user_id_str":"28725783"}]},"extended_entities":{"media":[{"id":720365333593260032,"id_str":"720365333593260032","indices":[104,127],"media_url":"http:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/cf9adelvaaa7bqm.jpg","url":"https:\/\/twitter.com\/1rtiq5eas9","display_url":"pic.twitter.com\/1rtiq5eas9","expanded_url":"http:\/\/twitter.com\/staplescenter\/status\/720365343500144640\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":425,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"large":{"w":1024,"h":1280,"resize":"fit"},"medium":{"w":600,"h":750,"resize":"fit"}},"source_status_id":720365343500144640,"source_status_id_str":"720365343500144640","source_user_id":28725783,"source_user_id_str":"28725783"}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1460585162546"}
i've been trying load txt file read.csv(file, header = true, sep ="")
, 1st problem i've found since txt formed having white space separator header, error saying there more columns in rows in header (of course i'm trying process text tweets).
if don't specify separator (i.e. read.csv(file)
) , dump content in dataframe, 1 column.
any hint?
you like
txt <- readlines("myfile.txt") df <- read.table(text=sub("\\d+-\\d+-\\d+ \\d+:\\d+:\\d+", '"\\1"', txt), header=t) library(jsonlite) tojson(df) # [{"text":"rt @kobebryant: last night final chapter incredible story. walk ...
problems arise, because datetime column created
not wrapped in quotes. thus, date , time separated - , number of header fields not match anymore. (this simple approach may break if there example similar patterns in text
column.)
Comments
Post a Comment