library(devtools)
library(RColorBrewer)
library(formattable)
library(ggmap)
library(osmar)
library(RSQLite)
library(sqldf)
library(jsonlite)
library(mongolite)
library(plotly)
library(osmplotr)
library(geojsonio)
library(DT)
# Not installed
# library(RMongo)
# Not used
library(mapview)
library(bigmemory)
library(rio)
library(dygraphs)
library(highcharter)
library(rbokeh)
library(maps)
library(R2HTML)
options(repr.plot.width = 9, repr.plot.height = 9)
osmmap <- get_map(location = c(53.5800,23.7350,56.8870,26.5390), source = "osm")
ggmap(osmmap, extent = "normal")
The reader can see some examples of use the ggmap package besides just displaying the maps.
gc01 <- geocode("Jumerah", output = "more")
formattable(data.frame(gc01))
formattable(data.frame(revgeocode(gc04, output = "more")))
gc02 <- as.numeric(geocode("Jumerah"))
gc02
formattable(data.frame(revgeocode(gc02, output = "more")))
gc04 <- as.numeric(geocode("Dubai International Airport"))
gc04
formattable(data.frame(mapdist("dubai", "abu-dhabi")))
formattable(data.frame(mapdist("Jumerah", "Dubai International Airport")))
geocode("Burj Khalifa", output = "more")
geocode("Business Bay", output = "more")
var_ways <- route('Burj Khalifa', 'Business Bay', alternatives = TRUE)
formattable(head(data.frame(var_ways)))
options(repr.plot.width = 5, repr.plot.height = 5)
ggplot(data = var_ways) + geom_leg(aes(x = startLon, xend = endLon, y = startLat, yend = endLat, color = route)) + coord_map()
options(repr.plot.width = 10, repr.plot.height = 4)
qmap(location=c(55.2820, 25.1900), zoom = 15, maptype = 'roadmap', base_layer = ggplot(aes(x = startLon, y = startLat), data = var_ways)) +
geom_leg(aes(x = startLon, xend = endLon, y = startLat, yend = endLat, color = route), alpha = 0.5, size = 2, data = var_ways) +
labs(x = 'Longitude', y = 'Latitude', colour = 'Route') +
facet_wrap(~ route, ncol = 3) + theme(legend.position = 'top')
options(repr.plot.width = 10, repr.plot.height = 10)
way_map <- get_map(location = c(55.2820, 25.1900), source = "google", zoom = 15, maptype = "hybrid")
ggmap(way_map) + geom_leg(data = var_ways, aes(x = startLon, xend = endLon, y = startLat, yend = endLat, color = route), alpha = 0.7, size = 2)
There are several ways to extract geodata. One of them is to do this with this R code cell.
This set of commands allows us to upload the data using the coordinates.
src <- osmsource_api()
smallbox <- center_bbox(55.2708, 25.2048, 1000, 1000)
sdubai <- get_osm(smallbox, source = src)
str(sdubai)
bigbox <- center_bbox(55.2708, 25.2048, 6000, 6000)
bdubai <- get_osm(bigbox, source = src)
str(bdubai)
node_tags <- sort(unique(bdubai$nodes$tags$k))
print(node_tags)
way_tags <- sort(unique(bdubai$ways$tags$k))
print(way_tags)
users <- unique(bdubai$nodes$attrs$user)
print(users)
plot(bdubai)
tss <- find(sdubai, node(tags(v == "traffic_signals")))
ts_sdubai <- subset(sdubai, node_ids = tss)
bss <- find(sdubai, node(tags(v %agrep% "busstop")))
bs_sdubai <- subset(sdubai, node_ids = bss)
hws <- find(sdubai, way(tags(k == "highway")))
hws <- find_down(sdubai, way(hws))
hw_sdubai <- subset(sdubai, ids = hws)
tus <- find(sdubai, way(tags(k == "tunnel")))
tus <- find_down(sdubai, way(tus))
tu_sdubai <- subset(sdubai, ids = tus)
plot_ways(hw_sdubai, col = "steelblue")
plot_ways(tu_sdubai, add = TRUE, col = "magenta")
plot_nodes(ts_sdubai, add = TRUE, col = "red")
plot_nodes(bs_sdubai, add = TRUE, col = "blue")
ts <- find(bdubai, node(tags(v == "traffic_signals")))
ts_dubai <- subset(bdubai, node_ids = ts)
bs <- find(bdubai, node(tags(v %agrep% "busstop")))
bs_dubai <- subset(bdubai, node_ids = bs)
hw <- find(bdubai, way(tags(k == "highway")))
hw <- find_down(bdubai, way(hw))
hw_dubai <- subset(bdubai, ids = hw)
tu <- find(bdubai, way(tags(k == "tunnel")))
tu <- find_down(bdubai, way(tu))
tu_dubai <- subset(bdubai, ids = tu)
plot_ways(hw_dubai, col = "steelblue")
plot_ways(tu_dubai, add = TRUE, col = "magenta")
plot_nodes(ts_dubai, add = TRUE, col = "red")
plot_nodes(bs_dubai, add = TRUE, col = "blue")
brewer.pal.info["Set3",]$maxcolors
bg <- find(bdubai, way(tags(k == "building")))
bg <- find_down(bdubai, way(bg))
bg_dubai <- subset(bdubai, ids = bg)
bg_poly <- as_sp(bg_dubai, "polygons")
spplot(bg_poly, col.regions=brewer.pal(12, "Set3"), c("version"))
# bus <- find(bdubai, relation(tags(v == "bus")))
# bus_dubai <- lapply(bus, function(i) { as_sp(get_osm(relation(i), full = TRUE), "lines") })
bs_points <- as_sp(bs_dubai, "points")
hw_line <- as_sp(hw_dubai, "lines")
plot(bg_poly, col = "lightsteelblue")
plot(hw_line, add = TRUE, col = "blue")
plot(bs_points, add = TRUE, col = "red")
# for ( i in seq(along = bus_dubai) ) { plot(bus[[i]], add = TRUE, col = "blue") }
Another possible way is extracting data files in many different formats from the website: https://mapzen.com/data/metro-extracts/metro/dubai_abu-dhabi/ . The files dubai_abu-dhabi.osm, dubai_abu-dhabi_buildings.geojson, etc. were downloaded. The data from the format osm of the file were extracted in formats csv and json using specially designed functions in the programming language python.
Size of the downloaded osm, json and csv file.
file.size("/Users/olgabelitskaya/large-repo/dubai_abu-dhabi.osm")
file.size("/Users/olgabelitskaya/large-repo/dubai_abu-dhabi.osm.json")
file.size("/Users/olgabelitskaya/large-repo/nodes.csv")
file.size("/Users/olgabelitskaya/large-repo/nodes_tags.csv")
file.size("/Users/olgabelitskaya/large-repo/ways.csv")
file.size("/Users/olgabelitskaya/large-repo/ways_tags.csv")
file.size("/Users/olgabelitskaya/large-repo/ways_nodes.csv")
source1 <- osmsource_file("dubai_abu-dhabi.osm")
# dubai1 <- get_osm(complete_file(), source=source1)
dubai2 <- osmar:::get_osm_data.osmfile(source1)
dubai2[5]
as_osmar(xmlParse(dubai2[5]))$nodes$attrs
get_osm(node(21133779), source = osmsource_api())$nodes$attrs
dad_box <- get_bbox(c(55.2408, 25.1548, 55.2808, 25.2148))
dad_buildings <- extract_osm_objects(key='building', bbox=dad_box)
dad_buildings
dad_highways <- extract_osm_objects(key='highway', bbox=dad_box)
dad_map <- osm_basemap(bbox = dad_box, bg = 'lightgrey')
dad_map <- add_osm_objects(dad_map, dad_buildings, col = 'darkblue')
dad_map <- add_osm_objects(dad_map, dad_highways, col = 'steelblue')
dad_map
# write.csv(dubai1$nodes$attrs, file = "rnodes.csv")
# file.size("rnodes.csv")
# write.csv(dubai1$nodes$tags, file = "rnodes_tags.csv")
# file.size("rnodes_tags.csv")
# write.csv(dubai1$ways$attrs, file = "rways.csv")
# file.size("rways.csv")
# write.csv(dubai1$ways$tags, file = "rways_tags.csv")
# file.size("rways_tags.csv")
# write.csv(dubai1$ways$refs, file = "rways_refs.csv")
# file.size("rways_refs.csv")
# write.csv(dubai1$relations$attrs, file = "rrelations.csv")
# file.size("rrelations.csv")
# write.csv(dubai1$relation$tags, file = "rrelations_tags.csv")
# file.size("rrelations_tags.csv")
# write.csv(dubai1$relation$refs, file = "rrelations_refs.csv")
# file.size("rrelations_refs.csv")
The displayed lines of code represent the process of recording information of the CSV files to the SQL database.
Variant #1
sqlite <- dbDriver("SQLite")
dubai_abu_dhabi <- dbConnect(sqlite,"dubai_abu_dhabi.sqlite3")
nodes <- read.csv('nodes.csv')
nodes_tags <- read.csv('nodes_tags.csv')
ways <- read.csv('ways.csv')
ways_tags <- read.csv('ways_tags.csv')
ways_nodes <- read.csv('ways_nodes.csv')
# dbWriteTable(conn = dubai_abu_dhabi, name = 'nodes', value = nodes, row.names = FALSE)
# dbWriteTable(conn = dubai_abu_dhabi, name = 'nodes_tags', value = nodes_tags, row.names = FALSE)
# dbWriteTable(conn = dubai_abu_dhabi, name = 'ways', value = ways, row.names = FALSE)
# dbWriteTable(conn = dubai_abu_dhabi, name = 'ways_tags', value = ways_tags, row.names = FALSE)
# dbWriteTable(conn = dubai_abu_dhabi, name = 'ways_nodes', value = ways_nodes, row.names = FALSE)
dbListTables(dubai_abu_dhabi)
dbListFields(dubai_abu_dhabi, 'nodes')
Variant #2
# sqldf("attach dubai_abu_dhabi as new")
# read.csv.sql("nodes.csv", sql = "create table nodes as select * from file", dbname = "dubai_abu_dhabi")
sqldf("select * from nodes limit 3", dbname = "dubai_abu_dhabi")
# read.csv.sql("nodes_tags.csv", sql = "create table nodes_tags as select * from file", dbname = "dubai_abu_dhabi")
sqldf("select * from nodes_tags limit 3", dbname = "dubai_abu_dhabi")
# read.csv.sql("ways.csv", sql = "create table ways as select * from file", dbname = "dubai_abu_dhabi")
sqldf("select * from ways limit 3", dbname = "dubai_abu_dhabi")
# read.csv.sql("ways_tags.csv", sql = "create table ways_tags as select * from file", dbname = "dubai_abu_dhabi")
sqldf("select * from ways_tags limit 3", dbname = "dubai_abu_dhabi")
# read.csv.sql("ways_nodes.csv", sql = "create table ways_nodes as select * from file", dbname = "dubai_abu_dhabi")
sqldf("select * from ways_nodes limit 3", dbname = "dubai_abu_dhabi")
query001 = "SELECT COUNT(*) FROM nodes;"
query002 = "SELECT COUNT(*) FROM ways;"
The number of nodes:
sqldf(query001)
The number of ways:
sqldf(query002)
The number of users:
print(sqldf("SELECT COUNT(DISTINCT(e.uid)) FROM (SELECT uid FROM nodes UNION ALL SELECT uid FROM ways) e;"))
The database allows to evaluate the contribution of each individual user in map editing.
Let us list the 3 most active editors of this map section:
formattable(sqldf("SELECT e.user, COUNT(*) as num \
FROM (SELECT user FROM nodes UNION ALL SELECT user FROM ways) e \
GROUP BY e.user \
ORDER BY num DESC \
LIMIT 3;"))
A list of the 3 most common types of places:
formattable(sqldf("SELECT value, COUNT(*) as num \
FROM nodes_tags \
WHERE key='place' \
GROUP BY value \
ORDER BY num DESC \
LIMIT 3;"))
A list of the 10 most common types of buildings:
formattable(sqldf("SELECT value, COUNT(*) as num \
FROM nodes_tags \
WHERE key='building' \
GROUP BY value \
ORDER BY num DESC \
LIMIT 10;"))
A list of the 10 most common facilities:
formattable(sqldf("SELECT value, COUNT(*) as num \
FROM nodes_tags \
WHERE key='amenity' \
GROUP BY value \
ORDER BY num DESC \
LIMIT 10;"))
A list of the 20 most common streets:
formattable(sqldf("SELECT value, COUNT(*) as num \
FROM nodes_tags \
WHERE key='street' \
GROUP BY value \
ORDER BY num DESC \
LIMIT 20;"))
# dbDisconnect(dubai_abu_dhabi)
With very similar manipulations we can import the data from JSON files into MongoDB.
# Run mongod from terminal
Let's explore the dataset with the 'mongolite' package.
Variant #1
mg1 <- mongoDbConnect('test')
m1 <- mongo("openstreetmap_correct", verbose = FALSE)
stream_in(file("/Users/olgabelitskaya/large-repo/dubai_abu-dhabi_postcode.osm.json"),
handler = function(df){m1$insert(df)})
m1$count()
Variant #2
m <- mongo("openstreetmap", verbose = FALSE)
# stream_in(file("dubai_abu-dhabi.osm.json"), handler = function(df){m$insert(df)})
The number of documents:
m$count()
The three most active editors of this map section:
m$aggregate('[
{ "$group" : {"_id" : "$created.user", "count" : { "$sum" : 1} } },
{ "$sort" : {"count" : -1} }, { "$limit" : 3 }
]')
The number of users with one note and the list of 10 users with only one note:
m$aggregate('[
{ "$group" : {"_id" : "$created.user", "count" : { "$sum" : 1} } },
{ "$group" : {"_id" : "$count", "num_users": { "$sum" : 1} } },
{ "$sort" : {"_id" : 1} }, { "$limit" : 1}
]')
m$aggregate('[
{ "$group" : {"_id" : "$created.user", "count" : { "$sum" : 1} } },
{ "$sort" : {"count" : 1} }, { "$limit" : 10 }
]')
The list of 3 most common places:
m$aggregate('[
{ "$match" : { "address.place" : { "$exists" : 1} } },
{ "$group" : { "_id" : "$address.place", "count" : { "$sum" : 1} } },
{ "$sort" : { "count" : -1}}, {"$limit":3}
]')
The list of 10 most common types of buildings:
m$aggregate('[
{ "$match": { "building": { "$exists": 1}}},
{ "$group": { "_id": "$building", "count": { "$sum": 1}}},
{ "$sort": { "count": -1}}, {"$limit": 10}
]')
The list of 10 most common facilities:
m$aggregate('[
{ "$match": { "amenity": { "$exists": 1}}},
{ "$group": { "_id": "$amenity", "count": { "$sum": 1}}},
{ "$sort": { "count": -1}}, { "$limit": 10}
]')
The list of 3 most common zipcodes:
m$aggregate('[
{ "$match" : { "address.postcode" : { "$exists" : 1} } },
{ "$group" : { "_id" : "$address.postcode", "count" : { "$sum" : 1} } },
{ "$sort" : { "count" : -1}}, {"$limit": 3}
]')
Counting zipcodes with one document:
m$aggregate(' [
{ "$group" : {"_id" : "$address.postcode", "count" : { "$sum" : 1} } },
{ "$group" : {"_id" : "$count", "count": { "$sum" : 1} } },
{ "$sort" : {"_id" : 1} }, { "$limit" : 1}
]')
Some examples of statistics indicators for this dataset:
m$info()$stats$ns
m$info()$stats$size
m$info()$stats$avgObjSize
m$info()$stats$storageSize
One of the main problems of public maps - no duplication of all place names in other languages. If it were possible to automate the translation process by increasing a common database of map names in many languages, it would save users from many difficulties and mistakes.
The next problem - the presence of a large number of databases (including mapping) on the same map objects. Some intergraph procedures of already available data would relieve a lot of people from unnecessary work, save time and effort.
Obviously, the information about the number of buildings and their purpose is incomplete. Completeness of public maps can be increased by bringing in the process of mapping new users. For this goal enter the information should be as simple as possible: for example, a choice of the available options with automatic filling many fields for linked options (for example, linking the name of the street and the administrative area in which it is located).
There are a number of mistakes and typos as in every public data. For correction them well-known methods can be proposed: automatic comparison with existing data and verification for new data by other users.
The lack of a uniform postal code system in this concrete dataset complicates their identification and verification.
During working on the project, I spent a lot of time on the conversion of one type of data file to another. Each format has its own advantages and disadvantages. Probably, it is possible to design a universal file type that allows us to store data of any kind, combining the advantages of all existing types and applicable in the most of existing programming languages.
Correction of errors made in the data seems to me appropriate to carry out after uploading files to the database. Sometimes a record that is a mistake in terms of filling a particular type of data just contains additional information about geoobjects.
1) nodes - points in space with basic characteristics (lat, long, id, tags);
2) ways - defining linear features and area boundaries (an ordered list of nodes);
3) relations - tags and also an ordered list of nodes, ways and/or relations as members which is used to define logical or geographic relationships between other elements.
1) Size of the .osm file: 394,4 MB.
2) Size of the .osm sample file : 3,9 MB.
3) Nodes: 1890178.
4) Ways: 234327.
5) Relations: 2820.
6) Tags: 503027.
7) Users: 1895.
With the help of a specific set of commands we can perform a statistical description of the data collections and the databases.
I think this project is educational for me. I believe that one of the main tasks in this case was to study the methods of extraction and researching of map data in open access. For example, I used a systematic sample of elements from the original .osm file for trying functions of processing before applying them to the whole dataset. As a result I have some new useful skills in parsing, processing, storing, aggregating and applying the data.
In the research I have read through quite a lot of projects of other students on this topic. After my own research and review the results of other authors I have formed a definite opinion about the ideas in OpenStreetMap.
This website can be viewed as a testing ground of interaction of a large number of people (ncluding non-professionals) to create a unified information space. The prospects of such cooperation can not be overemphasized. The success of the project will allow to implement the ambitious plans in the field of available information technologies, the creation of virtual reality and many other areas.
Increasing of the number of users leads to many positive effects in this kind of projects:
1) a rapid improvement in the accuracy, completeness and timeliness of information;
2) approximation of the information space to the reality , the objectivity of the data evaluation;
3) reduce the effort for data cleansing on erroneous details.
Ideas for improving the project OpenStreetMap are simple and natural.
Increasing the number of users can be achieved by additional options like marks of the rating evaluation (eg, the best restaurant or the most convenient parking).
The popularity of the project may be more due to the temporary pop-up messages of users (placement is not more than 1-3 hours) with actual information about the geographic location (eg, the presence of traffic jams).